[NO-ISSUE] Implement v2
This commit is contained in:
BIN
shopping-crawler/.jqwik-database
Normal file
BIN
shopping-crawler/.jqwik-database
Normal file
Binary file not shown.
42
shopping-crawler/build.gradle
Normal file
42
shopping-crawler/build.gradle
Normal file
@@ -0,0 +1,42 @@
|
||||
dependencies {
|
||||
developmentOnly 'org.springframework.boot:spring-boot-devtools'
|
||||
runtimeOnly 'com.h2database:h2'
|
||||
runtimeOnly 'com.mysql:mysql-connector-j'
|
||||
compileOnly 'org.projectlombok:lombok'
|
||||
|
||||
implementation project(':support')
|
||||
// https://projectreactor.io/docs/core/release/reference/#debug-activate
|
||||
implementation("org.springframework.boot:spring-boot-starter-web") {
|
||||
exclude group: "org.springframework.boot", module: "spring-boot-starter-tomcat"
|
||||
}
|
||||
|
||||
implementation("org.springframework.boot:spring-boot-starter-undertow") {
|
||||
exclude group: "io.undertow", module: "undertow-websockets-jsr"
|
||||
}
|
||||
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
|
||||
implementation 'org.springframework.boot:spring-boot-configuration-processor'
|
||||
implementation 'org.springframework.cloud:spring-cloud-starter-config'
|
||||
implementation 'org.springframework.boot:spring-boot-starter-actuator'
|
||||
implementation 'com.rometools:rome:2.1.0'
|
||||
implementation 'org.jsoup:jsoup:1.17.2'
|
||||
implementation 'com.h2database:h2:2.2.224'
|
||||
implementation "org.springframework.cloud:spring-cloud-starter-openfeign"
|
||||
implementation "io.github.openfeign:feign-hc5"
|
||||
implementation 'org.ahocorasick:ahocorasick:0.6.3'
|
||||
|
||||
annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor'
|
||||
annotationProcessor 'org.projectlombok:lombok'
|
||||
|
||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||
testImplementation('org.assertj:assertj-core:3.25.3')
|
||||
testImplementation("org.jeasy:easy-random-core:5.0.0")
|
||||
testCompileOnly 'org.projectlombok:lombok'
|
||||
testAnnotationProcessor('org.projectlombok:lombok')
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
testLogging {
|
||||
events "passed", "skipped", "failed"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.myoa.engineering.crawl.shopping;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
@EnableScheduling
|
||||
@EnableFeignClients
|
||||
@EnableConfigurationProperties
|
||||
@SpringBootApplication
|
||||
public class ShoppingCrawlerApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(ShoppingCrawlerApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.datasource;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.h2.tools.Server;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.context.event.ContextClosedEvent;
|
||||
import org.springframework.context.event.ContextRefreshedEvent;
|
||||
import org.springframework.context.event.EventListener;
|
||||
|
||||
import java.sql.SQLException;
|
||||
|
||||
@Slf4j
|
||||
@Profile({"datasource-local", "datasource-development"})
|
||||
@Configuration
|
||||
public class H2ConsoleConfiguration {
|
||||
|
||||
private Server webServer;
|
||||
|
||||
@Value("${spring.h2.console.port}")
|
||||
private String port;
|
||||
|
||||
@EventListener(ContextRefreshedEvent.class)
|
||||
public void start() throws SQLException {
|
||||
log.info("starting h2 console");
|
||||
this.webServer = Server.createWebServer("-webPort", port, "-tcpAllowOthers").start();
|
||||
}
|
||||
|
||||
@EventListener(ContextClosedEvent.class)
|
||||
public void stop() {
|
||||
log.info("stopping h2 console");
|
||||
this.webServer.stop(); ;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.datasource;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.configuration.datasource.properties.DatasourceProperties;
|
||||
import com.myoa.engineering.crawl.shopping.configuration.datasource.properties.HibernateProperties;
|
||||
import com.myoa.engineering.crawl.shopping.configuration.datasource.properties.HikariProperties;
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.BaseScanDomain;
|
||||
import com.myoa.engineering.crawl.shopping.infra.repository.BaseScanRepository;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import jakarta.persistence.EntityManagerFactory;
|
||||
import lombok.NonNull;
|
||||
import org.hibernate.boot.model.naming.CamelCaseToUnderscoresNamingStrategy;
|
||||
import org.hibernate.boot.model.naming.ImplicitNamingStrategyJpaCompliantImpl;
|
||||
import org.hibernate.cfg.AvailableSettings;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.data.jpa.repository.config.EnableJpaAuditing;
|
||||
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
|
||||
import org.springframework.orm.jpa.JpaTransactionManager;
|
||||
import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean;
|
||||
import org.springframework.transaction.PlatformTransactionManager;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
@Configuration
|
||||
@EnableJpaAuditing
|
||||
@EnableJpaRepositories(basePackageClasses = BaseScanRepository.class,
|
||||
entityManagerFactoryRef = "shoppingCrawlerEntityManagerFactory",
|
||||
transactionManagerRef = "shoppingCrawlerTransactionManager"
|
||||
)
|
||||
public class ShoppingCrawlerDatasourceConfiguration {
|
||||
|
||||
private static final String DATA_SOURCE_UNIT_NAME = "crawler-shopping";
|
||||
|
||||
private final DatasourceProperties dataSourceProeprties;
|
||||
private final HikariProperties hikariProperties;
|
||||
private final HibernateProperties hibernateProperties;
|
||||
|
||||
public ShoppingCrawlerDatasourceConfiguration(DatasourceProperties dataSourceProeprties,
|
||||
HikariProperties hikariProperties,
|
||||
HibernateProperties hibernateProperties) {
|
||||
this.dataSourceProeprties = dataSourceProeprties;
|
||||
this.hikariProperties = hikariProperties;
|
||||
this.hibernateProperties = hibernateProperties;
|
||||
}
|
||||
|
||||
@Bean(name = "shoppingCrawlerDataSource")
|
||||
public DataSource dataSource() {
|
||||
DatasourceProperties.DataSourcePropertiesUnit dataSourcePropertiesUnit = dataSourceProeprties.find(DATA_SOURCE_UNIT_NAME);
|
||||
|
||||
final HikariConfig hikariConfig = new HikariConfig();
|
||||
hikariConfig.setJdbcUrl(dataSourcePropertiesUnit.toCompletedJdbcUrl());
|
||||
hikariConfig.setDriverClassName(dataSourcePropertiesUnit.getDriverClassName());
|
||||
hikariConfig.setUsername(dataSourcePropertiesUnit.getUsername());
|
||||
hikariConfig.setPassword(dataSourcePropertiesUnit.getPassword());
|
||||
hikariConfig.setAutoCommit(hikariProperties.getAutoCommit());
|
||||
hikariConfig.setMaximumPoolSize(hikariProperties.getMaximumPoolSize());
|
||||
hikariConfig.setMinimumIdle(hikariProperties.getMinimumIdle());
|
||||
if (hikariProperties.getMaximumPoolSize() > hikariProperties.getMinimumIdle()) {
|
||||
hikariConfig.setIdleTimeout(hikariProperties.getIdleTimeout());
|
||||
}
|
||||
hikariConfig.setValidationTimeout(hikariProperties.getValidationTimeout());
|
||||
hikariConfig.setConnectionTimeout(hikariProperties.getConnectionTimeout());
|
||||
hikariConfig.setMaxLifetime(hikariProperties.getMaxLifetime());
|
||||
|
||||
final DataSource dataSource = new HikariDataSource(hikariConfig);
|
||||
return dataSource;
|
||||
}
|
||||
|
||||
@Bean("shoppingCrawlerEntityManagerFactory")
|
||||
public LocalContainerEntityManagerFactoryBean entityManagerFactory(
|
||||
EntityManagerFactoryBuilder builder,
|
||||
@Qualifier("shoppingCrawlerDataSource") DataSource dataSource) {
|
||||
return builder.dataSource(dataSource)
|
||||
.packages(BaseScanDomain.class)
|
||||
.properties(getPropsMap(DATA_SOURCE_UNIT_NAME))
|
||||
.build();
|
||||
}
|
||||
|
||||
@Bean("shoppingCrawlerTransactionManager")
|
||||
public PlatformTransactionManager transactionManager(
|
||||
@Qualifier("shoppingCrawlerEntityManagerFactory") EntityManagerFactory entityManagerFactory) {
|
||||
return new JpaTransactionManager(entityManagerFactory);
|
||||
}
|
||||
|
||||
public static Properties getProps(@NonNull HibernateProperties.HibernatePropertiesUnit hibernateProperties) {
|
||||
Properties properties = new Properties();
|
||||
properties.put(AvailableSettings.DIALECT, hibernateProperties.getDialect());
|
||||
properties.put(AvailableSettings.FORMAT_SQL, hibernateProperties.getFormatSql());
|
||||
properties.put(AvailableSettings.SHOW_SQL, hibernateProperties.getShowSql());
|
||||
properties.put(AvailableSettings.HBM2DDL_AUTO, hibernateProperties.getHbm2ddlAuto());
|
||||
properties.put(AvailableSettings.CONNECTION_PROVIDER_DISABLES_AUTOCOMMIT, hibernateProperties.getDisableAutoCommit());
|
||||
properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY, ImplicitNamingStrategyJpaCompliantImpl.class.getName());
|
||||
properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY, CamelCaseToUnderscoresNamingStrategy.class.getName());
|
||||
properties.put(AvailableSettings.GENERATE_STATISTICS, "false");
|
||||
properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS, "true");
|
||||
properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS_SKIP_COLUMN_DEFINITIONS, "true");
|
||||
properties.put(AvailableSettings.STATEMENT_BATCH_SIZE, "20");
|
||||
properties.put(AvailableSettings.ORDER_INSERTS, "true");
|
||||
properties.put(AvailableSettings.ORDER_UPDATES, "true");
|
||||
properties.put(AvailableSettings.BATCH_VERSIONED_DATA, "true");
|
||||
// properties.put(AvailableSettings.JPA_ID_GENERATOR_GLOBAL_SCOPE_COMPLIANCE, "false");
|
||||
return properties;
|
||||
}
|
||||
|
||||
public Map<String, String> getPropsMap(@NonNull String unitName) {
|
||||
return convertPropertiestoMaps(getProps(hibernateProperties.find(unitName)));
|
||||
}
|
||||
|
||||
public Map<String, String> convertPropertiestoMaps(Properties properties) {
|
||||
Map<String, String> propertiesMap = new HashMap<>();
|
||||
|
||||
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements(); ) {
|
||||
String key = (String) e.nextElement();
|
||||
propertiesMap.put(key, properties.getProperty(key));
|
||||
}
|
||||
return propertiesMap;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.datasource.properties;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.util.ObjectUtil;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Data
|
||||
@ConfigurationProperties(prefix = "datasource")
|
||||
public class DatasourceProperties {
|
||||
|
||||
private List<DataSourcePropertiesUnit> units;
|
||||
|
||||
@Data
|
||||
public static class DataSourcePropertiesUnit {
|
||||
|
||||
private String unitName;
|
||||
private String schemaName;
|
||||
private String connectionParameters;
|
||||
private String dbConnectionUrl;
|
||||
private Boolean isSimpleConnectionUrl;
|
||||
private String username;
|
||||
private String password;
|
||||
private String driverClassName;
|
||||
|
||||
public String toCompletedJdbcUrl() {
|
||||
if (ObjectUtil.isEmpty(isSimpleConnectionUrl) || isSimpleConnectionUrl == false) {
|
||||
return String.format("%s/%s?%s", dbConnectionUrl, schemaName, connectionParameters);
|
||||
}
|
||||
return dbConnectionUrl;
|
||||
}
|
||||
}
|
||||
|
||||
public DataSourcePropertiesUnit find(String unitName) {
|
||||
return units.stream()
|
||||
.filter(e -> e.getUnitName().equals(unitName))
|
||||
.findFirst()
|
||||
.orElseThrow(
|
||||
() -> new IllegalArgumentException(this.getClass().getName() + ": unitName Not found. " + unitName));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.datasource.properties;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Setter
|
||||
@Getter
|
||||
@ConfigurationProperties(prefix = "hibernate")
|
||||
public class HibernateProperties {
|
||||
|
||||
private List<HibernatePropertiesUnit> units;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public static class HibernatePropertiesUnit {
|
||||
|
||||
private String unitName;
|
||||
private String dialect;
|
||||
private String formatSql;
|
||||
private String showSql;
|
||||
private String hbm2ddlAuto;
|
||||
private String disableAutoCommit;
|
||||
|
||||
}
|
||||
|
||||
public HibernatePropertiesUnit find(String unitName) {
|
||||
return units.stream()
|
||||
.filter(x -> x.getUnitName().equals(unitName))
|
||||
.findFirst()
|
||||
.orElseThrow(
|
||||
() -> new IllegalArgumentException(this.getClass().getName() + ": unitName Not found. " + unitName));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.datasource.properties;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Setter
|
||||
@Getter
|
||||
@ConfigurationProperties(prefix = "spring.datasource.hikari")
|
||||
public class HikariProperties {
|
||||
|
||||
private Integer minimumIdle;
|
||||
private Integer maximumPoolSize;
|
||||
private Integer idleTimeout;
|
||||
private Integer validationTimeout;
|
||||
private Integer connectionTimeout;
|
||||
private Integer maxLifetime;
|
||||
private Boolean autoCommit;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.myoa.engineering.crawl.shopping.configuration.slack.properties;
|
||||
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "slack.bot")
|
||||
public class SlackSecretProperties {
|
||||
|
||||
private List<SlackSecretPropertiesUnit> units;
|
||||
|
||||
@Data
|
||||
public static class SlackSecretPropertiesUnit {
|
||||
private String botUnitName;
|
||||
private String username;
|
||||
private String iconEmoji;
|
||||
private String channel;
|
||||
private String token;
|
||||
}
|
||||
|
||||
public SlackSecretPropertiesUnit find(String botUnitName) {
|
||||
return units.stream()
|
||||
.filter(e -> e.getBotUnitName().equals(botUnitName))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IllegalArgumentException("not found bot unit name : " + botUnitName));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.myoa.engineering.crawl.shopping.controller;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.crawlhandler.PpomppuCrawlDomesticHandler;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/exploit")
|
||||
public class TestAPIController {
|
||||
|
||||
private final PpomppuCrawlDomesticHandler ppomppuCrawlDomesticHandler;
|
||||
|
||||
public TestAPIController(PpomppuCrawlDomesticHandler ppomppuCrawlDomesticHandler) {
|
||||
this.ppomppuCrawlDomesticHandler = ppomppuCrawlDomesticHandler;
|
||||
}
|
||||
|
||||
@GetMapping("/triggers")
|
||||
public void triggerExploit() {
|
||||
ppomppuCrawlDomesticHandler.handle();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.myoa.engineering.crawl.shopping.crawlhandler;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
|
||||
public interface CrawlHandler {
|
||||
|
||||
CrawlTarget getCrawlTarget();
|
||||
|
||||
void handle();
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.myoa.engineering.crawl.shopping.crawlhandler;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class FmkoreaCrawlHandler implements CrawlHandler {
|
||||
@Override
|
||||
public CrawlTarget getCrawlTarget() {
|
||||
return CrawlTarget.FMKOREA;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handle() {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.myoa.engineering.crawl.shopping.crawlhandler;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.crawlhandler.parser.PpomppuArticleParserV2;
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
|
||||
import com.myoa.engineering.crawl.shopping.infra.client.ppomppu.PpomppuBoardClientV2;
|
||||
import com.myoa.engineering.crawl.shopping.service.ArticleCommandService;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Component
|
||||
public class PpomppuCrawlDomesticHandler implements CrawlHandler {
|
||||
|
||||
private final PpomppuBoardClientV2 ppomppuBoardClient;
|
||||
private final PpomppuArticleParserV2 ppomppuArticleParserV2;
|
||||
private final ArticleCommandService articleCommandService;
|
||||
|
||||
public PpomppuCrawlDomesticHandler(PpomppuBoardClientV2 ppomppuBoardClient,
|
||||
PpomppuArticleParserV2 ppomppuArticleParserV2,
|
||||
ArticleCommandService articleCommandService) {
|
||||
this.ppomppuBoardClient = ppomppuBoardClient;
|
||||
this.ppomppuArticleParserV2 = ppomppuArticleParserV2;
|
||||
this.articleCommandService = articleCommandService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CrawlTarget getCrawlTarget() {
|
||||
return CrawlTarget.PPOMPPU_DOMESTIC;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handle() {
|
||||
|
||||
String boardHtmlPage1 = ppomppuBoardClient.getBoardHtml("/zboard/zboard.php", generateRequestParams(1));
|
||||
List<Article> parsedPage1 = ppomppuArticleParserV2.parse(boardHtmlPage1);
|
||||
|
||||
String boardHtmlPage2 = ppomppuBoardClient.getBoardHtml("/zboard/zboard.php", generateRequestParams(2));
|
||||
List<Article> parsedPage2 = ppomppuArticleParserV2.parse(boardHtmlPage2);
|
||||
|
||||
List<Article> merged = Stream.of(parsedPage1, parsedPage2)
|
||||
.flatMap(List::stream)
|
||||
.map(e -> e.updateCrawlTarget(getCrawlTarget()))
|
||||
.toList();
|
||||
|
||||
articleCommandService.upsert(merged);
|
||||
}
|
||||
|
||||
private Map<String, String> generateRequestParams(int pageId) {
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("id", "ppomppu");
|
||||
params.put("page", String.valueOf(pageId));
|
||||
return params;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
package com.myoa.engineering.crawl.shopping.crawlhandler.parser;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import com.myoa.engineering.crawl.shopping.util.DateTimeUtils;
|
||||
import com.myoa.engineering.crawl.shopping.util.NumberUtils;
|
||||
import io.micrometer.core.instrument.util.StringUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public final class PpomppuArticleParserV2 {
|
||||
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
||||
.withZone(ZoneId.of("Asia/Seoul"));
|
||||
|
||||
private PpomppuArticleParserV2() {
|
||||
}
|
||||
|
||||
public List<Article> parse(String html) {
|
||||
Elements trElements = converHtmlToTrElements(html);
|
||||
return trElements.stream()
|
||||
.filter(this::isRealArticle)
|
||||
.map(this::parse)
|
||||
.toList();
|
||||
|
||||
}
|
||||
|
||||
private Elements converHtmlToTrElements(String data) {
|
||||
Document document = Jsoup.parse(data);
|
||||
Elements trList = document.getElementById("revolution_main_table").getElementsByTag("tr");
|
||||
return trList;
|
||||
}
|
||||
|
||||
private boolean isRealArticle(Element tr) {
|
||||
Elements tdList = tr.getElementsByTag("td");
|
||||
if (tdList.size() != 6) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasOnlyNumeric(tdList.get(0))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private Pattern pattern_numeric = Pattern.compile("\\d+");
|
||||
|
||||
private boolean hasOnlyNumeric(Element td) {
|
||||
return pattern_numeric.matcher(td.text()).matches();
|
||||
}
|
||||
|
||||
private Article parse(Element tr) {
|
||||
Elements tdList = tr.getElementsByTag("td");
|
||||
Long articleId = Long.parseLong(tdList.get(0).text());
|
||||
|
||||
String title = tdList.get(2).text();
|
||||
String articleUrl = parseArticleUrl(tdList.get(2).getElementsByTag("a").attr("href"));
|
||||
String boardName = parseBoardName(title);
|
||||
Integer recommended = parseRecommended(tdList.get(4));
|
||||
Integer hit = NumberUtils.parseInt(tdList.get(5).text(), 0);
|
||||
ZonedDateTime registeredAt = DateTimeUtils.parse(tdList.get(3).text());
|
||||
|
||||
return Article.builder()
|
||||
.articleId(articleId)
|
||||
.title(title)
|
||||
.boardName(boardName)
|
||||
.articleUrl(articleUrl)
|
||||
.recommended(recommended)
|
||||
.hit(hit)
|
||||
.registeredAt(registeredAt)
|
||||
.build();
|
||||
}
|
||||
|
||||
public Integer parseRecommended(Element td) {
|
||||
final String voteString = td.text();
|
||||
if (StringUtils.isEmpty(voteString)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
|
||||
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
|
||||
int recommended = voteUp - voteDown;
|
||||
return recommended;
|
||||
}
|
||||
|
||||
public static String parseArticleUrl(String data) {
|
||||
return PpomppuBoardName.ofViewPageUrl(data);
|
||||
}
|
||||
|
||||
Pattern patternBoardName = Pattern.compile("\\[(.+?)\\]");
|
||||
|
||||
public String parseBoardName(String fullTitle) {
|
||||
Matcher matcher = patternBoardName.matcher(fullTitle);
|
||||
String lastMatched = null;
|
||||
while (matcher.find()) {
|
||||
lastMatched = matcher.group(1);
|
||||
}
|
||||
return lastMatched;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.springframework.data.annotation.CreatedDate;
|
||||
import org.springframework.data.annotation.LastModifiedDate;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import org.springframework.data.jpa.domain.support.AuditingEntityListener;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.Instant;
|
||||
|
||||
@Getter
|
||||
@MappedSuperclass
|
||||
@EntityListeners(AuditingEntityListener.class)
|
||||
public abstract class Auditable implements Serializable {
|
||||
private static final long serialVersionUID = -7105030870015828551L;
|
||||
|
||||
@Column
|
||||
@CreatedDate
|
||||
private Instant createdAt;
|
||||
|
||||
@Column
|
||||
@LastModifiedDate
|
||||
private Instant modifiedAt;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity;
|
||||
|
||||
/**
|
||||
* BaseScanDomain
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
public interface BaseScanDomain {
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.ToString;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
@ToString
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@Entity
|
||||
@Table(name = "ppomppu_article")
|
||||
public class PpomppuArticle extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long articleId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
@Column
|
||||
private String articleUrl;
|
||||
|
||||
@Column
|
||||
private String title;
|
||||
|
||||
@Column
|
||||
private Integer hit;
|
||||
|
||||
@Column
|
||||
private Integer recommended;
|
||||
|
||||
@Column
|
||||
private ZonedDateTime registeredAt;
|
||||
|
||||
@Builder
|
||||
public PpomppuArticle(Long id, Long articleId, PpomppuBoardName boardName, String articleUrl,
|
||||
String title, Integer recommended, Integer hit, ZonedDateTime registeredAt) {
|
||||
this.id = id;
|
||||
this.articleId = articleId;
|
||||
this.boardName = boardName;
|
||||
this.articleUrl = articleUrl;
|
||||
this.title = title;
|
||||
this.recommended = recommended;
|
||||
this.hit = hit;
|
||||
this.registeredAt = registeredAt;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@Entity
|
||||
@Table(name = "ppomppu_board_feed_status")
|
||||
public class PpomppuBoardFeedStatus extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long latestParsedArticleId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
@Column
|
||||
private Instant updatedAt;
|
||||
|
||||
public static PpomppuBoardFeedStatus of(PpomppuBoardName boardName, Long latestArticleId) {
|
||||
return PpomppuBoardFeedStatus.builder()
|
||||
.boardName(boardName)
|
||||
.latestParsedArticleId(latestArticleId)
|
||||
.updatedAt(Instant.now())
|
||||
.build();
|
||||
}
|
||||
|
||||
public void updateArticleId(Long latestArticleId) {
|
||||
this.updatedAt = Instant.now();
|
||||
this.latestParsedArticleId = latestArticleId;
|
||||
}
|
||||
|
||||
@Builder
|
||||
public PpomppuBoardFeedStatus(Long id, Long latestParsedArticleId, PpomppuBoardName boardName, Instant updatedAt) {
|
||||
this.id = id;
|
||||
this.latestParsedArticleId = latestParsedArticleId;
|
||||
this.boardName = boardName;
|
||||
this.updatedAt = updatedAt;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import java.time.Instant;
|
||||
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@Entity
|
||||
@Table(name = "published_history")
|
||||
public class PublishedHistory extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long userId;
|
||||
|
||||
@Column
|
||||
private String boardNameList;
|
||||
|
||||
@Column
|
||||
private Instant publishedAt;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@Entity
|
||||
@Table(name = "subscribed_board")
|
||||
public class SubscribedBoard extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long userId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import java.time.Instant;
|
||||
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
@Entity
|
||||
@Table(name = "subscribed_user")
|
||||
public class SubscribedUser extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long userId;
|
||||
|
||||
@Column
|
||||
private Instant registeredAt;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Entity
|
||||
@Table
|
||||
public class AppUser extends Auditable {
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private String name;
|
||||
|
||||
@Column
|
||||
private Boolean enabled;
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Entity
|
||||
@Table
|
||||
public class Article extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private Long articleId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private CrawlTarget crawlTarget;
|
||||
|
||||
@Column
|
||||
private String boardName;
|
||||
|
||||
@Column
|
||||
private String articleUrl;
|
||||
|
||||
@Column
|
||||
private String title;
|
||||
|
||||
@Column
|
||||
private Integer hit;
|
||||
|
||||
@Column
|
||||
private Integer recommended;
|
||||
|
||||
@Column
|
||||
private ZonedDateTime registeredAt;
|
||||
|
||||
public Article update(Article article) {
|
||||
this.boardName = article.getBoardName();
|
||||
this.articleUrl = article.getArticleUrl();
|
||||
this.title = article.getTitle();
|
||||
this.hit = article.getHit();
|
||||
this.recommended = article.getRecommended();
|
||||
return this;
|
||||
}
|
||||
|
||||
public Article updateCrawlTarget(CrawlTarget crawlTarget) {
|
||||
this.crawlTarget = crawlTarget;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.entity.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Entity
|
||||
@Table
|
||||
public class SubscribedKeyword extends Auditable {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Long id;
|
||||
|
||||
@Column
|
||||
private String keyword;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private CrawlTarget crawlTarget;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.myoa.engineering.crawl.shopping.domain.model.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ArticleModel {
|
||||
|
||||
private Long id;
|
||||
private Long articleId;
|
||||
private CrawlTarget crawlTarget;
|
||||
private String boardName;
|
||||
private String articleUrl;
|
||||
private String title;
|
||||
private Integer hit;
|
||||
private Integer recommended;
|
||||
private ZonedDateTime registeredAt;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.Instant;
|
||||
|
||||
/**
|
||||
* FeedParsedResult
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
public class FeedParsedResult implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -3771310078623481348L;
|
||||
|
||||
private PpomppuBoardName boardName;
|
||||
private Instant requestedAt;
|
||||
private Instant processedAt;
|
||||
|
||||
@Builder
|
||||
public FeedParsedResult(PpomppuBoardName boardName, Instant requestedAt, Instant processedAt) {
|
||||
this.boardName = boardName;
|
||||
this.requestedAt = requestedAt;
|
||||
this.processedAt = processedAt;
|
||||
}
|
||||
|
||||
public static FeedParsedResult of(PpomppuBoardName boardName) {
|
||||
return FeedParsedResult.builder()
|
||||
.boardName(boardName)
|
||||
.requestedAt(Instant.now())
|
||||
.build();
|
||||
}
|
||||
|
||||
public FeedParsedResult done() {
|
||||
this.processedAt = Instant.now();
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto;
|
||||
|
||||
/**
|
||||
* PpomppuArticle
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
public class PpomppuArticleDTO {
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.SimpleMessageDTO;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* PpomppuArticleTransformer
|
||||
*
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-21
|
||||
*/
|
||||
public final class PpomppuArticleTransformer {
|
||||
|
||||
private PpomppuArticleTransformer() {
|
||||
}
|
||||
|
||||
private static final String MESSAGE_FORMAT_V1 = "%s)) `%s` <%s:LINK>";
|
||||
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
.withZone(ZoneId.of("Asia/Seoul"));
|
||||
/*
|
||||
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = entity ->
|
||||
SimpleMessageDTO.builder()
|
||||
.requestedAt(Instant.now())
|
||||
.publishedAt(entity.getRegisteredAt())
|
||||
.title(String.format(MESSAGE_FORMAT_V1, entity.getBoardName().getMenuName(), entity.getTitle()))
|
||||
.body(entity.getArticleUrl())
|
||||
.build();
|
||||
*/
|
||||
// https://stackoverflow.com/questions/24882927/using-streams-to-convert-a-list-of-objects-into-a-string-obtained-from-the-tostr
|
||||
public static SimpleMessageDTO transform(List<PpomppuArticle> articles) {
|
||||
Instant requestedAt = Instant.now();
|
||||
String body = articles.stream()
|
||||
.map(PpomppuArticleTransformer::convertToInlineMessage)
|
||||
.collect(Collectors.joining("\n\n"));
|
||||
return SimpleMessageDTO.builder()
|
||||
.requestedAt(requestedAt)
|
||||
.title(DATE_TIME_FORMATTER.format(requestedAt))
|
||||
.body(body)
|
||||
.build();
|
||||
}
|
||||
|
||||
public static String convertToInlineMessage(PpomppuArticle article) {
|
||||
return String.format(MESSAGE_FORMAT_V1,
|
||||
article.getBoardName().getMenuName(), article.getTitle(), article.getArticleUrl());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto.constant;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* WebClientPropertiesUnitName
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-18
|
||||
*
|
||||
*/
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public enum WebClientPropertiesUnitName {
|
||||
PPOMPPU_NOTIFIER_SENDER_API("ppn-sender-api"),
|
||||
;
|
||||
|
||||
private String unitName;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto.slack;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* MessageDTO
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-14
|
||||
*
|
||||
*/
|
||||
public interface MessageDTO extends Serializable {
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.myoa.engineering.crawl.shopping.dto.slack;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* SlackMessageDTO
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-14
|
||||
*
|
||||
*/
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
public class SlackMessageDTO implements MessageDTO {
|
||||
|
||||
private final static long serialVersionUID = 4737608709660494713L;
|
||||
|
||||
private String text;
|
||||
private String channel;
|
||||
private String username;
|
||||
|
||||
@JsonProperty("icon_emoji")
|
||||
private String iconEmoji;
|
||||
|
||||
@Builder
|
||||
public SlackMessageDTO(String text, String channel, String username, String iconEmoji) {
|
||||
this.text = text;
|
||||
this.channel = channel;
|
||||
this.username = username;
|
||||
this.iconEmoji = iconEmoji;
|
||||
}
|
||||
|
||||
public void applyText(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.myoa.engineering.crawl.shopping.event;
|
||||
|
||||
import org.springframework.context.ApplicationEvent;
|
||||
|
||||
public class ArticleUpsertEvent extends ApplicationEvent {
|
||||
|
||||
public ArticleUpsertEvent(Object source) {
|
||||
super(source);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.myoa.engineering.crawl.shopping.event;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
@Deprecated
|
||||
public class ArticleUpsertEventCommand {
|
||||
private final List<ArticleModel> articles;
|
||||
private final CrawlTarget crawlTarget;
|
||||
|
||||
public ArticleUpsertEventCommand(List<ArticleModel> articles, CrawlTarget crawlTarget) {
|
||||
this.articles = articles;
|
||||
this.crawlTarget = crawlTarget;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.myoa.engineering.crawl.shopping.event.handler;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.event.ArticleUpsertEvent;
|
||||
import com.myoa.engineering.crawl.shopping.service.SubscribedKeywordQueryService;
|
||||
import org.ahocorasick.trie.Trie;
|
||||
import org.springframework.context.event.EventListener;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class ArticleUpsertEventListener {
|
||||
|
||||
private final SubscribedKeywordQueryService subscribedKeywordQueryService;
|
||||
|
||||
public ArticleUpsertEventListener(SubscribedKeywordQueryService subscribedKeywordQueryService) {
|
||||
this.subscribedKeywordQueryService = subscribedKeywordQueryService;
|
||||
}
|
||||
|
||||
@EventListener
|
||||
public void handleArticleUpsertEvent(ArticleUpsertEvent event) {
|
||||
|
||||
System.out.println("event = " + event);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* PpomppuNotifierSenderAPIClient
|
||||
*
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-17
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class MessageSenderAPIClient {
|
||||
/*
|
||||
private final WebClient webClient;
|
||||
|
||||
public MessageSenderAPIClient(WebClientProperties webClientProperties) {
|
||||
WebClientPropertiesUnit webClientPropertiesUnit =
|
||||
webClientProperties.find(WebClientPropertiesUnitName.PPOMPPU_NOTIFIER_SENDER_API.getUnitName());
|
||||
this.webClient = WebClient.builder()
|
||||
.baseUrl(webClientPropertiesUnit.getBaseUrl())
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
|
||||
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
|
||||
// .filter(WebClientFilterFactory.logRequest())
|
||||
// .filter(WebClientFilterFactory.logResponse())
|
||||
.build();
|
||||
}
|
||||
|
||||
public Mono<String> sendMessageToSlack(SimpleMessageDTO dto) {
|
||||
return webClient.post()
|
||||
.uri("/api/v1/messages/sendMessage/messengers/slack")
|
||||
.bodyValue(dto)
|
||||
.exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
|
||||
.publishOn(Schedulers.boundedElastic())
|
||||
.onErrorResume(WebClientRequestException.class, t -> {
|
||||
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
|
||||
return Mono.empty();
|
||||
});
|
||||
}
|
||||
|
||||
*/
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.fmkorea;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
|
||||
@FeignClient(value = "fmkorea-board-client", url = "https://fmkorea.com")
|
||||
public interface FmkoreaBoardClient {
|
||||
|
||||
@GetMapping("{boardLink}")
|
||||
String getBoardHtml(@PathVariable("boardLink") String boardLink);
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.ppomppu;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* PpomppuBoardFeedRetriever
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class PpomppuBoardClient {
|
||||
/*
|
||||
private final WebClient webClient;
|
||||
|
||||
public PpomppuBoardClient(WebClient.Builder webClientBuilder) {
|
||||
this.webClient = webClientBuilder.baseUrl(PpomppuBoardName.PPOMPPU_URL)
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml())
|
||||
.filter(WebClientFilterFactory.logRequest())
|
||||
.filter(WebClientFilterFactory.logResponse())
|
||||
.build();
|
||||
}
|
||||
|
||||
public Mono<String> getHtml(String uri) {
|
||||
return webClient.get()
|
||||
.uri(uri)
|
||||
.exchangeToMono(e -> e.bodyToMono(String.class))
|
||||
.publishOn(Schedulers.boundedElastic())
|
||||
.onErrorResume(WebClientRequestException.class, t -> {
|
||||
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
|
||||
return Mono.empty();
|
||||
});
|
||||
// .doOnNext(e -> log.info("[getHtml] {}", e));
|
||||
}
|
||||
*/
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.ppomppu;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.cloud.openfeign.SpringQueryMap;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@FeignClient(value = "ppomppu-board-client", url = "https://www.ppomppu.co.kr")
|
||||
public interface PpomppuBoardClientV2 {
|
||||
|
||||
@GetMapping("{boardLink}")
|
||||
String getBoardHtml(@PathVariable("boardLink") String boardLink,
|
||||
@SpringQueryMap Map<String, String> params);
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.slack;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.configuration.slack.properties.SlackSecretProperties;
|
||||
import com.myoa.engineering.crawl.shopping.dto.slack.SlackMessageDTO;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class MongeShoppingBotSlackMessageSender {
|
||||
|
||||
private static final String SLACK_SECRET_UNIT_NAME = "shopping-crawler";
|
||||
|
||||
private final SlackSecretProperties.SlackSecretPropertiesUnit slackProperties;
|
||||
private final SlackAPIClient slackAPIClient;
|
||||
private final String token;
|
||||
|
||||
public MongeShoppingBotSlackMessageSender(SlackAPIClient slackAPIClient,
|
||||
SlackSecretProperties slackSecretProperties) {
|
||||
this.slackAPIClient = slackAPIClient;
|
||||
this.slackProperties = slackSecretProperties.find(SLACK_SECRET_UNIT_NAME);
|
||||
this.token = slackProperties.getToken();
|
||||
}
|
||||
|
||||
public SlackMessageDTO ofMessageTemplate() {
|
||||
return SlackMessageDTO.builder()
|
||||
.channel(slackProperties.getChannel())
|
||||
.iconEmoji(slackProperties.getIconEmoji())
|
||||
.username(slackProperties.getUsername())
|
||||
.build();
|
||||
}
|
||||
|
||||
public SlackMessageDTO ofMessage(String text) {
|
||||
return SlackMessageDTO.builder()
|
||||
.channel(slackProperties.getChannel())
|
||||
.iconEmoji(slackProperties.getIconEmoji())
|
||||
.username(slackProperties.getUsername())
|
||||
.text(text)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.slack;
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
@FeignClient(value = "slack-api-client", url = "https://slack.com")
|
||||
public interface SlackAPIClient {
|
||||
|
||||
@PostMapping("/api/v1/messages/sendMessage/messengers/slack")
|
||||
String sendMessage();
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.client.slack;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* SlackMessageSender
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
@Slf4j
|
||||
public class SlackMessageSender { /* implements MessageSender<SlackMessageDTO> {*/
|
||||
|
||||
/*
|
||||
private static final String SLACK_API_URL = "https://slack.com/api";
|
||||
|
||||
private final WebClient webClient;
|
||||
|
||||
public SlackMessageSender(String apiSecret) {
|
||||
this.webClient = WebClient.builder()
|
||||
.baseUrl(SLACK_API_URL)
|
||||
.defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + apiSecret)
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
|
||||
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_UTF8_VALUE)
|
||||
.defaultHeader(HttpHeaders.ACCEPT_CHARSET, "UTF-8")
|
||||
.filter(WebClientFilterFactory.logResponse())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mono<String> sendMessage(SlackMessageDTO message) {
|
||||
return webClient.post()
|
||||
.uri("/chat.postMessage")
|
||||
.bodyValue(message)
|
||||
.exchangeToMono(e -> e.bodyToMono(String.class))
|
||||
.publishOn(Schedulers.boundedElastic())
|
||||
.onErrorResume(WebClientRequestException.class, t -> {
|
||||
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
|
||||
return Mono.empty();
|
||||
});
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository;
|
||||
|
||||
public interface BaseScanRepository {
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@Repository
|
||||
public interface PpomppuArticleRepository extends JpaRepository<PpomppuArticle, Long> {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository.v1;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuBoardFeedStatus;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
@Repository
|
||||
public interface PpomppuBoardFeedStatusRepository extends JpaRepository<PpomppuBoardFeedStatus, Long> {
|
||||
|
||||
Optional<PpomppuBoardFeedStatus> findByBoardName(PpomppuBoardName boardName);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.AppUser;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@Repository
|
||||
public interface AppUserRepository extends JpaRepository<AppUser, Long> {
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
@Repository
|
||||
public interface ArticleRepository extends JpaRepository<Article, Long> {
|
||||
|
||||
Optional<Article> findByArticleId(Long articleId);
|
||||
List<Article> findByArticleIdIn(Collection<Long> articleId);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.myoa.engineering.crawl.shopping.infra.repository.v2;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.SubscribedKeyword;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Repository
|
||||
public interface SubscribedKeywordRepository extends JpaRepository<SubscribedKeyword, Long> {
|
||||
|
||||
List<SubscribedKeyword> findByCrawlTarget(CrawlTarget crawlTarget);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.myoa.engineering.crawl.shopping.scheduler;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.crawlhandler.CrawlHandler;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@EnableScheduling
|
||||
public class ParseEventEmitter {
|
||||
private final List<CrawlHandler> crawlHandlers;
|
||||
|
||||
|
||||
public ParseEventEmitter(List<CrawlHandler> crawlHandlers) {
|
||||
this.crawlHandlers = crawlHandlers;
|
||||
}
|
||||
|
||||
// @Scheduled(cron = "0 0/5 * * * ?")
|
||||
public void emit() {
|
||||
log.info("[emitDomesticBoard] trigger fired!");
|
||||
crawlHandlers.forEach(CrawlHandler::handle);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
package com.myoa.engineering.crawl.shopping.service;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
|
||||
import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel;
|
||||
import com.myoa.engineering.crawl.shopping.event.ArticleUpsertEvent;
|
||||
import com.myoa.engineering.crawl.shopping.infra.repository.v2.ArticleRepository;
|
||||
import jakarta.transaction.Transactional;
|
||||
import org.springframework.context.ApplicationEventPublisher;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
public class ArticleCommandService {
|
||||
|
||||
private final ArticleRepository articleRepository;
|
||||
private final ApplicationEventPublisher applicationEventPublisher;
|
||||
|
||||
public ArticleCommandService(ArticleRepository articleRepository,
|
||||
ApplicationEventPublisher applicationEventPublisher) {
|
||||
this.articleRepository = articleRepository;
|
||||
this.applicationEventPublisher = applicationEventPublisher;
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public void upsert(List<Article> articles) {
|
||||
Map<Long, Article> articleMap = articles.stream()
|
||||
.collect(Collectors.toMap(Article::getArticleId, e -> e));
|
||||
|
||||
List<Article> saved = articleRepository.findByArticleIdIn(articleMap.keySet());
|
||||
List<Article> updated = saved.stream()
|
||||
.filter(e -> articleMap.containsKey(e.getArticleId()))
|
||||
.map(e -> e.update(articleMap.get(e.getArticleId())))
|
||||
.peek(e -> articleMap.remove(e.getArticleId()))
|
||||
.sorted(Comparator.comparing(Article::getArticleId))
|
||||
.toList();
|
||||
|
||||
List<Article> newArticles = articleMap.values()
|
||||
.stream()
|
||||
.sorted(Comparator.comparing(Article::getArticleId))
|
||||
.toList();
|
||||
|
||||
articleRepository.saveAll(updated);
|
||||
articleRepository.saveAll(newArticles);
|
||||
|
||||
publish(newArticles);
|
||||
}
|
||||
|
||||
private void publish(List<Article> articles) {
|
||||
List<ArticleModel> articleModels =
|
||||
articles.stream()
|
||||
.map(transformer)
|
||||
.toList();
|
||||
applicationEventPublisher.publishEvent(new ArticleUpsertEvent(articleModels));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void upsert(Article article) {
|
||||
Article saved = articleRepository.findByArticleId(article.getArticleId())
|
||||
.orElse(article);
|
||||
articleRepository.save(saved);
|
||||
}
|
||||
|
||||
public static Function<Article, ArticleModel> transformer =
|
||||
article -> ArticleModel.builder()
|
||||
.id(article.getId())
|
||||
.articleId(article.getArticleId())
|
||||
.crawlTarget(article.getCrawlTarget())
|
||||
.boardName(article.getBoardName())
|
||||
.articleUrl(article.getArticleUrl())
|
||||
.title(article.getTitle())
|
||||
.hit(article.getHit())
|
||||
.recommended(article.getRecommended())
|
||||
.registeredAt(article.getRegisteredAt())
|
||||
.build();
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.myoa.engineering.crawl.shopping.service;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* MessageSenderService
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2021-11-21
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class MessageSenderService {
|
||||
|
||||
/*
|
||||
private final MessageSenderAPIClient messageSenderAPIClient;
|
||||
|
||||
public MessageSenderService(MessageSenderAPIClient messageSenderAPIClient) {
|
||||
this.messageSenderAPIClient = messageSenderAPIClient;
|
||||
}
|
||||
|
||||
public String sendMessageToSlack(PpomppuArticle article) {
|
||||
return messageSenderAPIClient.sendMessageToSlack(PpomppuArticleTransformer.TRANSFORM_TO_MESSAGE_DTO.apply(article));
|
||||
}
|
||||
|
||||
public String sendMessageToSlack(List<PpomppuArticle> articles) {
|
||||
return messageSenderAPIClient.sendMessageToSlack(PpomppuArticleTransformer.transform(articles));
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.myoa.engineering.crawl.shopping.service;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuBoardFeedStatus;
|
||||
import com.myoa.engineering.crawl.shopping.infra.repository.v1.PpomppuArticleRepository;
|
||||
import com.myoa.engineering.crawl.shopping.infra.repository.v1.PpomppuBoardFeedStatusRepository;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class PpomppuArticleService {
|
||||
|
||||
private final PpomppuArticleRepository ppomppuArticleRepository;
|
||||
|
||||
private final PpomppuBoardFeedStatusRepository ppomppuBoardFeedStatusRepository;
|
||||
|
||||
public PpomppuArticleService(PpomppuArticleRepository ppomppuArticleRepository,
|
||||
PpomppuBoardFeedStatusRepository ppomppuBoardFeedStatusRepository) {
|
||||
this.ppomppuArticleRepository = ppomppuArticleRepository;
|
||||
this.ppomppuBoardFeedStatusRepository = ppomppuBoardFeedStatusRepository;
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public List<PpomppuArticle> filterOnlyNewArticles(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
|
||||
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
|
||||
Long latestArticleId = boardFeedStatus.map(PpomppuBoardFeedStatus::getLatestParsedArticleId)
|
||||
.orElse(0L);
|
||||
|
||||
log.info("latestArticleId : {}", latestArticleId);
|
||||
return articles.stream()
|
||||
.filter(e -> e.getArticleId().compareTo(latestArticleId) > 0)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public List<PpomppuArticle> save(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
|
||||
Long latestArticleId = articles.stream()
|
||||
.map(PpomppuArticle::getArticleId)
|
||||
.max(Long::compareTo)
|
||||
.orElse(0L);
|
||||
|
||||
// save PpomppuBoardFeedStatus
|
||||
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
|
||||
log.info("boardName: {}, isPresent?: {}", boardName, boardFeedStatus.isPresent());
|
||||
boardFeedStatus.ifPresentOrElse(e -> {
|
||||
if (latestArticleId.longValue() > 0L) {
|
||||
e.updateArticleId(latestArticleId);
|
||||
ppomppuBoardFeedStatusRepository.save(e);
|
||||
}
|
||||
},
|
||||
() -> ppomppuBoardFeedStatusRepository.save(PpomppuBoardFeedStatus.of(boardName,
|
||||
latestArticleId)));
|
||||
|
||||
// save real articles.
|
||||
return ppomppuArticleRepository.saveAll(articles);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
package com.myoa.engineering.crawl.shopping.service;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* PpomppuFeedService
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class PpomppuFeedService {
|
||||
/*
|
||||
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
|
||||
|
||||
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
|
||||
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
|
||||
}
|
||||
|
||||
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
||||
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
||||
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
||||
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
||||
.map(e -> e.updateBoardName(boardName))
|
||||
// .doOnNext(e -> log.info("parsed Result: {}", e))
|
||||
.collectList();
|
||||
}
|
||||
|
||||
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
|
||||
return html.map(Jsoup::parse)
|
||||
.mapNotNull(e -> e.getElementById("revolution_main_table"))
|
||||
.map(e -> e.getElementsByTag("tbody"))
|
||||
// .doOnNext(e -> log.info("tbody - {}", e.html()))
|
||||
.map(e -> e.stream()
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
|
||||
}
|
||||
|
||||
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
|
||||
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
|
||||
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
|
||||
}
|
||||
|
||||
private PpomppuArticle convertFromElement(Element element) {
|
||||
return PpomppuArticleParser.toArticle(element.getElementsByTag("td"));
|
||||
}
|
||||
|
||||
*/
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.myoa.engineering.crawl.shopping.service;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v2.SubscribedKeyword;
|
||||
import com.myoa.engineering.crawl.shopping.infra.repository.v2.SubscribedKeywordRepository;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Service
|
||||
public class SubscribedKeywordQueryService {
|
||||
|
||||
private final SubscribedKeywordRepository subscribedKeywordRepository;
|
||||
|
||||
public SubscribedKeywordQueryService(SubscribedKeywordRepository subscribedKeywordRepository) {
|
||||
this.subscribedKeywordRepository = subscribedKeywordRepository;
|
||||
}
|
||||
|
||||
public List<SubscribedKeyword> findAll() {
|
||||
return subscribedKeywordRepository.findAll();
|
||||
}
|
||||
|
||||
public List<SubscribedKeyword> findByCrawlTarget(CrawlTarget crawlTarget) {
|
||||
return subscribedKeywordRepository.findByCrawlTarget(crawlTarget);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.myoa.engineering.crawl.shopping.util;
|
||||
|
||||
import java.time.*;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
public final class DateTimeUtils {
|
||||
|
||||
private static final DateTimeFormatter FORMATTER_HHMMss = DateTimeFormatter.ofPattern("HH:mm:ss");
|
||||
private static final ZoneId ZONE_ASIA_SEOUL = ZoneId.of("Asia/Seoul");
|
||||
|
||||
private DateTimeUtils() {
|
||||
}
|
||||
|
||||
public static ZonedDateTime parse(String HHMMss) {
|
||||
try {
|
||||
LocalTime time = LocalTime.parse(HHMMss, FORMATTER_HHMMss);
|
||||
LocalDateTime dateTime = LocalDateTime.of(LocalDate.now(), time);
|
||||
if (dateTime.isAfter(LocalDateTime.now())) {
|
||||
dateTime = dateTime.minusDays(1);
|
||||
}
|
||||
return dateTime.atZone(ZONE_ASIA_SEOUL);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.myoa.engineering.crawl.shopping.util;
|
||||
|
||||
public final class NumberUtils {
|
||||
|
||||
private NumberUtils() {
|
||||
}
|
||||
|
||||
public static Integer parseInt(String value) {
|
||||
return parseInt(value, null);
|
||||
}
|
||||
|
||||
public static Integer parseInt(String value, Integer defaultValue) {
|
||||
try {
|
||||
return Integer.parseInt(value);
|
||||
} catch (NumberFormatException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
public static Long parseLong(String value) {
|
||||
return parseLong(value, null);
|
||||
}
|
||||
|
||||
public static Long parseLong(String value, Long defaultValue) {
|
||||
try {
|
||||
return Long.parseLong(value);
|
||||
} catch (NumberFormatException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package com.myoa.engineering.crawl.shopping.util;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.core.json.JsonReadFeature;
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||
import com.fasterxml.jackson.databind.json.JsonMapper;
|
||||
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import com.fasterxml.jackson.module.paramnames.ParameterNamesModule;
|
||||
|
||||
public final class ObjectMapperFactory {
|
||||
|
||||
public static final ObjectMapper DEFAULT_MAPPER;
|
||||
public static final ObjectMapper REDIS_MAPPER;
|
||||
@Deprecated
|
||||
public static final ObjectMapper LOGGING_MAPPER;
|
||||
|
||||
static {
|
||||
DEFAULT_MAPPER = initDefaultMapper();
|
||||
REDIS_MAPPER = initRedisMapper();
|
||||
LOGGING_MAPPER = JsonMapper.builder()
|
||||
.serializationInclusion(JsonInclude.Include.NON_EMPTY)
|
||||
.build();
|
||||
}
|
||||
|
||||
private ObjectMapperFactory() {
|
||||
}
|
||||
|
||||
private static ObjectMapper initDefaultMapper() {
|
||||
return JsonMapper.builder()
|
||||
.configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES.mappedFeature(), true)
|
||||
.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS.mappedFeature(), true)
|
||||
.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
|
||||
.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS)
|
||||
.disable(SerializationFeature.FAIL_ON_UNWRAPPED_TYPE_IDENTIFIERS)
|
||||
.enable(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL)
|
||||
.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
|
||||
.serializationInclusion(JsonInclude.Include.NON_NULL)
|
||||
.build()
|
||||
.registerModule(new ParameterNamesModule())
|
||||
.registerModule(new Jdk8Module())
|
||||
.registerModule(new JavaTimeModule());
|
||||
}
|
||||
|
||||
private static ObjectMapper initRedisMapper() {
|
||||
return JsonMapper.builder()
|
||||
.configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES.mappedFeature(), true)
|
||||
.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS.mappedFeature(), true)
|
||||
.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
|
||||
.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS)
|
||||
.disable(SerializationFeature.FAIL_ON_UNWRAPPED_TYPE_IDENTIFIERS)
|
||||
.enable(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL)
|
||||
.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
|
||||
.serializationInclusion(JsonInclude.Include.NON_NULL)
|
||||
.build()
|
||||
.registerModule(new ParameterNamesModule())
|
||||
.registerModule(new Jdk8Module())
|
||||
.registerModule(new JavaTimeModule());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2019 LINE Corporation. All rights reserved.
|
||||
* LINE Corporation PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
||||
*/
|
||||
|
||||
package com.myoa.engineering.crawl.shopping.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* NumberUtils
|
||||
*
|
||||
* @author Shin Woo-jin (lp12254@linecorp.com)
|
||||
* @since 2019-10-28
|
||||
*/
|
||||
public final class ObjectUtils {
|
||||
|
||||
private ObjectUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if given object is null.
|
||||
* <code>
|
||||
* e == object == > false e == null == > true
|
||||
* </code>
|
||||
*
|
||||
* @param e Target object
|
||||
* @param <E> Unfixed specific type. If you want restrict specific interface, Copy and extend qualifier.
|
||||
* @return Is null given object?
|
||||
*/
|
||||
public static <E> boolean isNullObject(final E e) {
|
||||
return e == null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if given object is not null.
|
||||
* <code>
|
||||
* e == object == > false e == null == > true
|
||||
* </code>
|
||||
*
|
||||
* @param e Target object
|
||||
* @param <E> Unfixed specific type. If you want restrict specific interface, Copy and extend qualifier.
|
||||
* @return Is not null given object?
|
||||
*/
|
||||
public static <E> boolean isNotEmpty(final E e) {
|
||||
return !isNullObject(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are any null object in given objects.
|
||||
* <code>
|
||||
* args == object = > false args == object, object = > false args == null, null, object = > true args
|
||||
* == null = > true args == null, null = > true
|
||||
* </code>
|
||||
*
|
||||
* @param args Want to check objects that have null.
|
||||
* @return Is there objects array has null?
|
||||
*/
|
||||
public static boolean hasNullObject(Object... args) {
|
||||
return Arrays.stream(args).anyMatch(ObjectUtils::isNullObject);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check given objects are not empty.
|
||||
* <code>
|
||||
* args == object = > true args == object, object = > true args == null, null, object = > false args
|
||||
* == null = > false args == null, null = > false
|
||||
* </code>
|
||||
*
|
||||
* @param args Want to check objects that have null.
|
||||
* @return Is there objects array has null?
|
||||
*/
|
||||
public static boolean hasAllObject(Object... args) {
|
||||
return Arrays.stream(args).noneMatch(ObjectUtils::isNullObject);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are all null object in given objects.
|
||||
* <code>
|
||||
* args == object = > false args == object, object = > false args == null, null, object = > false args
|
||||
* == null = > true args == null, null = > true
|
||||
* </code>
|
||||
*
|
||||
* @param args Want to check objects that have null.
|
||||
* @return Is there null all of given objects?
|
||||
*/
|
||||
public static boolean hasAllNullObjects(final Object... args) {
|
||||
return Arrays.stream(args).allMatch(ObjectUtils::isNullObject);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if given collection object is null or empty collecton.
|
||||
* <code>
|
||||
* e == null = > true e == emptyCollection = > true e == hasElement = > false
|
||||
* </code>
|
||||
*
|
||||
* @param e e is must be Collection object
|
||||
* @param <E> E is must be extended Collection Class
|
||||
* @return boolean. given collection is null or empty?
|
||||
*/
|
||||
public static <E extends Collection<?>> boolean isNullOrEmptyCollection(final E e) {
|
||||
return e == null || e.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get collection's size. Even it pointed null
|
||||
*
|
||||
* @param e e is must be Collection object
|
||||
* @param <E> E is must be extended Collection Class
|
||||
* @return integer value. given collection's size.
|
||||
*/
|
||||
public static <E extends Collection<?>> int getCollectionSize(final E e) {
|
||||
if (isNullOrEmptyCollection(e)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return e.size();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
spring:
|
||||
config:
|
||||
activate:
|
||||
on-profile: development
|
||||
# import:
|
||||
# - "configserver:http://192.168.0.100:20085"
|
||||
|
||||
|
||||
server:
|
||||
port: 20080
|
||||
|
||||
# import: optional:configserver:http://localhost:11080 # can be start up even config server was not found.
|
||||
12
shopping-crawler/src/main/resources/application-local.yml
Normal file
12
shopping-crawler/src/main/resources/application-local.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
spring:
|
||||
config:
|
||||
activate:
|
||||
on-profile: local
|
||||
import:
|
||||
- classpath:/datasource/local.yml
|
||||
- classpath:/slack/local.yml
|
||||
|
||||
server:
|
||||
port: 20080
|
||||
|
||||
# import: optional:configserver:http://localhost:11080 # can be start up even config server was not found.
|
||||
@@ -0,0 +1,6 @@
|
||||
spring:
|
||||
config:
|
||||
activate:
|
||||
on-profile: production
|
||||
import:
|
||||
- "configserver:http://ppn-config-server:20080"
|
||||
28
shopping-crawler/src/main/resources/application.yml
Normal file
28
shopping-crawler/src/main/resources/application.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
spring:
|
||||
application:
|
||||
name: crawler-shopping
|
||||
main:
|
||||
allow-bean-definition-overriding: true
|
||||
profiles:
|
||||
active: ${SPRING_ACTIVE_PROFILE:local}
|
||||
group:
|
||||
local: "local,datasource-local,webclient-local"
|
||||
development: "development,datasource-development,webclient-development"
|
||||
production: "production, datasource-production,webclient-production"
|
||||
freemarker:
|
||||
enabled: false
|
||||
cloud:
|
||||
config:
|
||||
enabled: false
|
||||
|
||||
server:
|
||||
port: 20080
|
||||
error:
|
||||
whitelabel:
|
||||
enabled: false
|
||||
|
||||
management:
|
||||
endpoints:
|
||||
web:
|
||||
exposure:
|
||||
include: refresh
|
||||
15
shopping-crawler/src/main/resources/logback-spring.xml
Normal file
15
shopping-crawler/src/main/resources/logback-spring.xml
Normal file
@@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<springProperty name="DEFAULT_LEVEL_CONFIG" source="log.defaultLevel" />
|
||||
<springProfile name="local">
|
||||
<include resource="logback/logback-development.xml" />
|
||||
<logger name="org.apache.kafka" level="INFO" />
|
||||
</springProfile>
|
||||
<springProfile name="development">
|
||||
<include resource="logback/logback-development.xml" />
|
||||
<logger name="org.apache.kafka" level="INFO" />
|
||||
</springProfile>
|
||||
<springProfile name="production">
|
||||
<include resource="logback/logback-production.xml" />
|
||||
</springProfile>
|
||||
</configuration>
|
||||
@@ -0,0 +1,23 @@
|
||||
<included>
|
||||
<property name="FILE_LOG_PATTERN"
|
||||
value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{45}:%L - %msg%n" />
|
||||
<property name="LOG_FILE_BASE" value="lcp-benefit-benefit-api" />
|
||||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${DIRECTORY}/${LOG_FILE_BASE}_log</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<fileNamePattern>${DIRECTORY}/${LOG_FILE_BASE}_log.%d{yyyyMMdd}.%i</fileNamePattern>
|
||||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
|
||||
<maxFileSize>1000MB</maxFileSize>
|
||||
</timeBasedFileNamingAndTriggeringPolicy>
|
||||
<maxHistory>60</maxHistory>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>${FILE_LOG_PATTERN}</pattern>
|
||||
<immediateFlush>${IMMEDIATE_FLUSH}</immediateFlush>
|
||||
</encoder>
|
||||
</appender>
|
||||
<appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender">
|
||||
<queueSize>1024</queueSize>
|
||||
<appender-ref ref="FILE" />
|
||||
</appender>
|
||||
</included>
|
||||
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<included>
|
||||
<!-- =========== property BETA ========= -->
|
||||
<property name="DEFAULT_LEVEL" value="${DEFAULT_LEVEL_CONFIG:-INFO}"/>
|
||||
<!--file-->
|
||||
<property name="DIRECTORY" value="/home1/www/logs/supervisor"/>
|
||||
<property name="IMMEDIATE_FLUSH" value="true"/>
|
||||
<!--nelo2-->
|
||||
<property name="NELO2_LEVEL" value="WARN"/>
|
||||
<!-- =========== include appender =========== -->
|
||||
<include resource="org/springframework/boot/logging/logback/defaults.xml"/>
|
||||
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
|
||||
<include resource="logback/component/logback-nelo2.xml"/>
|
||||
<include resource="logback/component/logback-datachain.xml"/>
|
||||
<!-- =========== root logger ============== -->
|
||||
<root level="${DEFAULT_LEVEL}">
|
||||
<appender-ref ref="CONSOLE"/>
|
||||
</root>
|
||||
</included>
|
||||
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<included>
|
||||
<!-- =========== property RELEASE ========= -->
|
||||
<property name="DEFAULT_LEVEL" value="${DEFAULT_LEVEL_CONFIG:-INFO}"/>
|
||||
<!--file-->
|
||||
<property name="DIRECTORY" value="/home1/www/logs/supervisor"/>
|
||||
<property name="IMMEDIATE_FLUSH" value="true"/>
|
||||
<!--nelo2-->
|
||||
<property name="NELO2_LEVEL" value="WARN"/>
|
||||
<!-- =========== include appender =========== -->
|
||||
<include resource="org/springframework/boot/logging/logback/defaults.xml"/>
|
||||
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
|
||||
<include resource="logback/component/logback-nelo2.xml"/>
|
||||
<include resource="logback/component/logback-datachain.xml"/>
|
||||
<!-- =========== root logger ============== -->
|
||||
<root level="${DEFAULT_LEVEL}">
|
||||
<appender-ref ref="CONSOLE"/>
|
||||
</root>
|
||||
</included>
|
||||
@@ -0,0 +1,118 @@
|
||||
package com.myoa.engineering.crawl.shopping.crawlhandler.parser;
|
||||
|
||||
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
|
||||
import com.myoa.engineering.crawl.shopping.util.DateTimeUtils;
|
||||
import com.myoa.engineering.crawl.shopping.util.NumberUtils;
|
||||
import com.myoa.engineering.crawl.shopping.util.TestDataUtils;
|
||||
import io.micrometer.core.instrument.util.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
class PpomppuArticleParserV2Test {
|
||||
|
||||
@Test
|
||||
void test1() {
|
||||
String data = TestDataUtils.fileToString("testdata/zboard/file1.html");
|
||||
Document document = Jsoup.parse(data);
|
||||
Elements trList = document.getElementById("revolution_main_table").getElementsByTag("tr");
|
||||
|
||||
List<PpomppuArticle> articles = trList.stream()
|
||||
.filter(this::isRealArticle)
|
||||
.map(this::parse)
|
||||
.toList();
|
||||
System.out.println(articles.size());
|
||||
System.out.println(articles.get(0));
|
||||
System.out.println(articles.get(1));
|
||||
}
|
||||
|
||||
private boolean isRealArticle(Element tr) {
|
||||
Elements tdList = tr.getElementsByTag("td");
|
||||
if (tdList.size() != 6) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasOnlyNumeric(tdList.get(0))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Pattern pattern_numeric = Pattern.compile("\\d+");
|
||||
|
||||
private boolean hasOnlyNumeric(Element td) {
|
||||
return pattern_numeric.matcher(td.text()).matches();
|
||||
}
|
||||
|
||||
public PpomppuArticle parse(Element tr) {
|
||||
Elements tdList = tr.getElementsByTag("td");
|
||||
Long articleId = Long.parseLong(tdList.get(0).text());
|
||||
|
||||
String title = tdList.get(2).text();
|
||||
String articleUrl = parseArticleUrl(tdList.get(2).getElementsByTag("a").attr("href"));
|
||||
PpomppuBoardName boardName = parseBoardName(title);
|
||||
Integer recommended = parseRecommended(tdList.get(4));
|
||||
Integer hit = NumberUtils.parseInt(tdList.get(5).text(), 0);
|
||||
|
||||
|
||||
ZonedDateTime registeredAt = DateTimeUtils.parse(tdList.get(3).text());
|
||||
|
||||
return PpomppuArticle.builder()
|
||||
.articleId(articleId)
|
||||
.title(title)
|
||||
.boardName(boardName)
|
||||
.articleUrl(articleUrl)
|
||||
.recommended(recommended)
|
||||
.hit(hit)
|
||||
.registeredAt(registeredAt)
|
||||
.build();
|
||||
}
|
||||
|
||||
public Long parseArticleId(Element td) {
|
||||
return Long.parseLong(td.text().trim());
|
||||
}
|
||||
|
||||
public static Integer parseRecommended(Element td) {
|
||||
final String voteString = td.text();
|
||||
if (StringUtils.isEmpty(voteString)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
|
||||
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
|
||||
int recommended = voteUp - voteDown;
|
||||
return recommended;
|
||||
}
|
||||
|
||||
public static String parseArticleUrl(String data) {
|
||||
return PpomppuBoardName.ofViewPageUrl(data);
|
||||
}
|
||||
|
||||
|
||||
Pattern patternBoardName = Pattern.compile("\\[(.+?)\\]");
|
||||
|
||||
public PpomppuBoardName parseBoardName(String fullTitle) {
|
||||
Matcher matcher = patternBoardName.matcher(fullTitle);
|
||||
String lastMatched = null;
|
||||
while (matcher.find()) {
|
||||
lastMatched = matcher.group(1);
|
||||
}
|
||||
return PpomppuBoardName.ofBoardName(lastMatched, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
void test2() {
|
||||
PpomppuBoardName boardName = parseBoardName("[자사몰]푸마 메쉬 폼스트라이프 [에디션] 5종 [세트] (18,220원/무료)6 [의류/잡화]");
|
||||
System.out.println(boardName);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
package com.myoa.engineering.crawl.shopping.event.handler;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import org.ahocorasick.trie.Emit;
|
||||
import org.ahocorasick.trie.Trie;
|
||||
import org.jeasy.random.EasyRandom;
|
||||
import org.jeasy.random.EasyRandomParameters;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
class ArticleUpsertEventListenerTest {
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
|
||||
List<String> baseData = List.of(
|
||||
"[공홈]베베숲 시그니처 위드 블루 20팩 (25,990원/무료)2 [기타]",
|
||||
"[현대H몰]10주년 스페셜 에디션 봉고데기 40mm 세트 (67,640원/무료)3 [가전/가구]",
|
||||
"[SSG]필립스 면도기 칫솔 기획전( 437,000원~/무료)1 [기타]",
|
||||
"(티몬)제스프리 골드키위 중대과 1.8kg내외 (카페,토페 14,823원/무배)3 [식품/건강]",
|
||||
"[공식몰]연세생활건강 당뇨영양식 24팩 (33,500원/유배)1 [식품/건강]",
|
||||
"[인터파크]크리스탈라이트 아이스티 에이드 온더고 60개입 멀티팩(17,800원/무료)6 [기타]",
|
||||
"[인팍쇼핑]샤카웨어 액티브 맥스 헤비웨이트 10종 택 1 (7,350원/무료)7 [의류/잡화]"
|
||||
);
|
||||
|
||||
Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeyword("블루")
|
||||
.addKeyword("봉고")
|
||||
.build();
|
||||
|
||||
List<Collection<Emit>> list = baseData.stream()
|
||||
.map(trie::parseText)
|
||||
.toList();
|
||||
|
||||
System.out.println(list);
|
||||
}
|
||||
|
||||
@Test
|
||||
void test2() throws InterruptedException {
|
||||
EasyRandomParameters paramsKeyword = new EasyRandomParameters()
|
||||
.stringLengthRange(2, 10);
|
||||
EasyRandom GEN_KEYWORD = new EasyRandom(paramsKeyword);
|
||||
|
||||
EasyRandomParameters paramsSentence = new EasyRandomParameters()
|
||||
.stringLengthRange(100, 1000);
|
||||
EasyRandom GEN_SENTENCE = new EasyRandom(paramsSentence);
|
||||
|
||||
Trie.TrieBuilder trieBuilder = Trie.builder().ignoreOverlaps();
|
||||
|
||||
GEN_KEYWORD.objects(TestA.class, 1000)
|
||||
.map(TestA::getFieldA)
|
||||
.forEach(trieBuilder::addKeyword);
|
||||
Trie trie = trieBuilder.build();
|
||||
|
||||
List<Collection<Emit>> listOfEmits = GEN_SENTENCE.objects(TestA.class, 2000)
|
||||
.map(TestA::getFieldA)
|
||||
.map(trie::parseText)
|
||||
.toList();
|
||||
|
||||
for(Collection<Emit> emits : listOfEmits) {
|
||||
System.out.println(emits);
|
||||
System.out.println("--------------------------");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class TestA {
|
||||
private String fieldA;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package com.myoa.engineering.crawl.shopping.util;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.springframework.lang.NonNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public final class TestDataUtils {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER;
|
||||
|
||||
static {
|
||||
OBJECT_MAPPER = ObjectMapperFactory.DEFAULT_MAPPER;
|
||||
}
|
||||
|
||||
private TestDataUtils() {
|
||||
}
|
||||
|
||||
public static <T> T inputStreamToObject(@NonNull String resourcePath,
|
||||
@NonNull Class<T> clazz) throws IOException {
|
||||
return OBJECT_MAPPER.readValue(fileToString(resourcePath), clazz);
|
||||
}
|
||||
|
||||
public static <T> List<T> inputStreamsToList(@NonNull Class<T> clazz,
|
||||
@NonNull String... resourcePath) throws IOException {
|
||||
return inputStreamsToList(Arrays.asList(resourcePath), clazz);
|
||||
}
|
||||
|
||||
public static <T> List<T> inputStreamsToList(@NonNull List<String> resourcePath,
|
||||
@NonNull Class<T> clazz) throws IOException {
|
||||
return resourcePath.stream()
|
||||
.map(e -> {
|
||||
try {
|
||||
return OBJECT_MAPPER.readValue(fileToString(e), clazz);
|
||||
} catch (JsonProcessingException jsonProcessingException) {
|
||||
jsonProcessingException.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static <T> List<T> inputStreamToList(@NonNull String resourcePath,
|
||||
@NonNull Class<T> clazz) throws IOException {
|
||||
return (List<T>) OBJECT_MAPPER.readValue(
|
||||
fileToString(resourcePath),
|
||||
OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, clazz));
|
||||
}
|
||||
|
||||
public static String fileToString(@NonNull String resourcePath) {
|
||||
|
||||
final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
|
||||
final Scanner s = new Scanner(classLoader.getResourceAsStream(resourcePath)).useDelimiter("\\A");
|
||||
return s.hasNext() ? s.next() : "";
|
||||
}
|
||||
|
||||
}
|
||||
12
shopping-crawler/src/test/resources/logback-development.xml
Normal file
12
shopping-crawler/src/test/resources/logback-development.xml
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<included>
|
||||
<!-- =========== property BETA ========= -->
|
||||
<property name="DEFAULT_LEVEL" value="${DEFAULT_LEVEL_CONFIG:-INFO}"/>
|
||||
<!-- =========== include appender =========== -->
|
||||
<include resource="org/springframework/boot/logging/logback/defaults.xml"/>
|
||||
<include resource="org/springframework/boot/logging/logback/console-appender.xml"/>
|
||||
<!-- =========== root logger ============== -->
|
||||
<root level="${DEFAULT_LEVEL}">
|
||||
<appender-ref ref="CONSOLE"/>
|
||||
</root>
|
||||
</included>
|
||||
6
shopping-crawler/src/test/resources/logback-spring.xml
Normal file
6
shopping-crawler/src/test/resources/logback-spring.xml
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuratiown>
|
||||
<springProperty name="DEFAULT_LEVEL_CONFIG" source="log.defaultLevel"/>
|
||||
|
||||
<include resource="logback-development.xml"/>
|
||||
</configuratiown>
|
||||
1100
shopping-crawler/src/test/resources/testdata/zboard/file1.html
vendored
Normal file
1100
shopping-crawler/src/test/resources/testdata/zboard/file1.html
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user