Merge pull request '[PPN-210926] Persist feed articles' (#8) from feature/PPN-210926-5 into develop

Reviewed-on: OutworldDestroyer/PpomppuNotifier#8
This commit is contained in:
nthfuncx 2021-09-26 22:25:36 +09:00
commit 22ac349d26
34 changed files with 747 additions and 59 deletions

46
PpomppuNotifier_DB.sql Normal file
View File

@ -0,0 +1,46 @@
create table "ppomppu_article"
(
"id" bigint generated by default as identity,
"article_id" bigint,
"article_url" varchar(255),
"board_name" integer,
"hit" integer,
"recommended" integer,
"registered_at" timestamp,
"title" varchar(255),
primary key ("id")
)
create table "ppomppu_board_feed_status"
(
"id" bigint generated by default as identity,
"board_name" integer,
"latest_parsed_article_id" bigint,
"updated_at" timestamp,
primary key ("id")
)
create table "published_history"
(
"id" bigint generated by default as identity,
"board_name_list" varchar(255),
"published_at" timestamp,
"user_id" bigint,
primary key ("id")
)
create table "subscribed_board"
(
"id" bigint generated by default as identity,
"board_name" integer,
"user_id" bigint,
primary key ("id")
)
create table "subscribed_user"
(
"id" bigint generated by default as identity,
"registered_at" timestamp,
"user_id" bigint,
primary key ("id")
)

54
PpomppuNotifier_ERD.puml Normal file
View File

@ -0,0 +1,54 @@
@startuml
'https://plantuml.com/sequence-diagram
class SubscribedUser {
- id
+ user_id
+ registered_at
+ created_at
+ modified_at
}
class SubscribedBoard {
- id
- user_id
+ board_name
+ created_at
+ modified_at
}
class PublishedHisotry {
- id
+ user_id
+ board_name_list
+ published_at
+ created_at
+ modified_at
}
class PpomppuArticle {
- id
+ article_id
+ board_name
+ article_url
+ title
+ recommended
+ hit
+ registered_at
+ created_at
+ modified_at
}
class PpomppuBoardFeedStatus {
- id
+ board_name
+ latest_parsed_article_id
+ updated_at
+ created_at
+ modified_at
}
SubscribedUser --o{ SubscribedBoard
SubscribedUser --o{ PublishedHisotry
@enduml

View File

@ -8,8 +8,10 @@ dependencies {
// https://projectreactor.io/docs/core/release/reference/#debug-activate // https://projectreactor.io/docs/core/release/reference/#debug-activate
implementation 'org.springframework.boot:spring-boot-starter-webflux' implementation 'org.springframework.boot:spring-boot-starter-webflux'
implementation 'org.springframework.boot:spring-boot-starter-data-jpa' implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
implementation 'org.springframework.boot:spring-boot-starter-jdbc'
implementation 'com.rometools:rome:1.16.0' implementation 'com.rometools:rome:1.16.0'
implementation 'org.jsoup:jsoup:1.14.2' implementation 'org.jsoup:jsoup:1.14.2'
implementation 'com.h2database:h2:1.4.200'
annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor'
annotationProcessor 'org.projectlombok:lombok' annotationProcessor 'org.projectlombok:lombok'

View File

@ -0,0 +1,35 @@
package com.myoa.engineering.crawl.ppomppu.processor.configuration;
import java.sql.SQLException;
import lombok.extern.slf4j.Slf4j;
import org.h2.tools.Server;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Profile;
import org.springframework.context.event.ContextClosedEvent;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.context.event.EventListener;
@Slf4j
@Profile("development")
@Configuration
public class H2ConsoleConfiguration {
private Server webServer;
@Value("${spring.h2.console.port}")
private String port;
@EventListener(ContextRefreshedEvent.class)
public void start() throws SQLException {
log.info("starting h2 console");
this.webServer = Server.createWebServer("-webPort", port, "-tcpAllowOthers").start();
}
@EventListener(ContextClosedEvent.class)
public void stop() {
log.info("stopping h2 console");
this.webServer.stop(); ;
}
}

View File

@ -0,0 +1,124 @@
package com.myoa.engineering.crawl.ppomppu.processor.configuration;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.properties.DatasourceProperties;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.properties.DatasourceProperties.DataSourcePropertiesUnit;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.properties.HibernateProperties;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.properties.HikariProperties;
import com.myoa.engineering.crawl.ppomppu.processor.domain.BaseScanDomain;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository.BaseScanRepository;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import javax.persistence.EntityManagerFactory;
import javax.sql.DataSource;
import lombok.NonNull;
import org.hibernate.cfg.AvailableSettings;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
import org.springframework.orm.jpa.JpaTransactionManager;
import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean;
import org.springframework.transaction.PlatformTransactionManager;
@Configuration
@EnableJpaRepositories(basePackageClasses = BaseScanRepository.class,
entityManagerFactoryRef = "ppomppuNotifierProcessorEntityManagerFactory",
transactionManagerRef = "ppomppuNotifierProcessorTransactionManager"
)
public class PpomppuDatasourceConfiguration {
private static final String DATA_SOURCE_UNIT_NAME = "ppomppu";
private final DatasourceProperties dataSourceProeprties;
private final HikariProperties hikariProperties;
private final HibernateProperties hibernateProperties;
public PpomppuDatasourceConfiguration(DatasourceProperties dataSourceProeprties,
HikariProperties hikariProperties,
HibernateProperties hibernateProperties) {
this.dataSourceProeprties = dataSourceProeprties;
this.hikariProperties = hikariProperties;
this.hibernateProperties = hibernateProperties;
}
@Bean(name = "ppomppuNotifierProcessorDataSource")
public DataSource dataSource() {
DataSourcePropertiesUnit dataSourcePropertiesUnit = dataSourceProeprties.find(DATA_SOURCE_UNIT_NAME);
final HikariConfig hikariConfig = new HikariConfig();
hikariConfig.setJdbcUrl(dataSourcePropertiesUnit.toCompletedJdbcUrl());
hikariConfig.setUsername("sa");
hikariConfig.setPassword("sa");
hikariConfig.setAutoCommit(hikariProperties.getAutoCommit());
hikariConfig.setMaximumPoolSize(hikariProperties.getMaximumPoolSize());
hikariConfig.setMinimumIdle(hikariProperties.getMinimumIdle());
if (hikariProperties.getMaximumPoolSize() > hikariProperties.getMinimumIdle()) {
hikariConfig.setIdleTimeout(hikariProperties.getIdleTimeout());
}
hikariConfig.setValidationTimeout(hikariProperties.getValidationTimeout());
hikariConfig.setConnectionTimeout(hikariProperties.getConnectionTimeout());
hikariConfig.setMaxLifetime(hikariProperties.getMaxLifetime());
final DataSource dataSource = new HikariDataSource(hikariConfig);
return dataSource;
}
@Bean("ppomppuNotifierProcessorEntityManagerFactory")
public LocalContainerEntityManagerFactoryBean entityManagerFactory(
EntityManagerFactoryBuilder builder,
@Qualifier("ppomppuNotifierProcessorDataSource") DataSource dataSource) {
return builder.dataSource(dataSource)
.packages(BaseScanDomain.class)
.properties(getPropsMap(DATA_SOURCE_UNIT_NAME))
.build();
}
@Bean("ppomppuNotifierProcessorTransactionManager")
public PlatformTransactionManager transactionManager(
@Qualifier("ppomppuNotifierProcessorEntityManagerFactory") EntityManagerFactory entityManagerFactory) {
return new JpaTransactionManager(entityManagerFactory);
}
public static Properties getProps(@NonNull HibernateProperties.HibernatePropertiesUnit hibernateProperties) {
Properties properties = new Properties();
properties.put(AvailableSettings.DIALECT, hibernateProperties.getDialect());
properties.put(AvailableSettings.FORMAT_SQL, hibernateProperties.getFormatSql());
properties.put(AvailableSettings.SHOW_SQL, hibernateProperties.getShowSql());
properties.put(AvailableSettings.HBM2DDL_AUTO, hibernateProperties.getHbm2ddlAuto());
properties.put(AvailableSettings.CONNECTION_PROVIDER_DISABLES_AUTOCOMMIT,
hibernateProperties.getDisableAutoCommit());
properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY,
"org.springframework.boot.orm.jpa.hibernate.SpringImplicitNamingStrategy");
properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY,
"org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy");
properties.put(AvailableSettings.GENERATE_STATISTICS, "false");
properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS, "true");
properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS_SKIP_COLUMN_DEFINITIONS, "true");
properties.put(AvailableSettings.STATEMENT_BATCH_SIZE, "20");
properties.put(AvailableSettings.ORDER_INSERTS, "true");
properties.put(AvailableSettings.ORDER_UPDATES, "true");
properties.put(AvailableSettings.BATCH_VERSIONED_DATA, "true");
properties.put(AvailableSettings.USE_NEW_ID_GENERATOR_MAPPINGS, "false");
return properties;
}
public Map<String, String> getPropsMap(@NonNull String unitName) {
return convertPropertiestoMaps(getProps(hibernateProperties.find(unitName)));
}
public Map<String, String> convertPropertiestoMaps(Properties properties) {
Map<String, String> propertiesMap = new HashMap<>();
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements(); ) {
String key = (String) e.nextElement();
propertiesMap.put(key, properties.getProperty(key));
}
return propertiesMap;
}
}

View File

@ -0,0 +1,44 @@
package com.myoa.engineering.crawl.ppomppu.processor.configuration.properties;
import com.myoa.engineering.crawl.ppomppu.support.util.ObjectUtil;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@Setter
@Getter
@ConfigurationProperties(prefix = "datasource")
public class DatasourceProperties {
private List<DataSourcePropertiesUnit> units;
@Getter
@Setter
public static class DataSourcePropertiesUnit {
private String unitName;
private String schemaName;
private String connectionParameters;
private String dbConnectionUrl;
private Boolean simpleConnectionUrl;
public String toCompletedJdbcUrl() {
if (ObjectUtil.isEmpty(simpleConnectionUrl) || simpleConnectionUrl == false) {
return String.format("%s/%s?%s", dbConnectionUrl, schemaName, connectionParameters);
}
return dbConnectionUrl;
}
}
public DataSourcePropertiesUnit find(String unitName) {
return units.stream()
.filter(e -> e.getUnitName().equals(unitName))
.findFirst()
.orElseThrow(
() -> new IllegalArgumentException(this.getClass().getName() + ": unitName Not found. " + unitName));
}
}

View File

@ -0,0 +1,38 @@
package com.myoa.engineering.crawl.ppomppu.processor.configuration.properties;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@Setter
@Getter
@ConfigurationProperties(prefix = "hibernate")
public class HibernateProperties {
private List<HibernatePropertiesUnit> units;
@Getter
@Setter
public static class HibernatePropertiesUnit {
private String unitName;
private String dialect;
private String formatSql;
private String showSql;
private String hbm2ddlAuto;
private String disableAutoCommit;
}
public HibernatePropertiesUnit find(String unitName) {
return units.stream()
.filter(x -> x.getUnitName().equals(unitName))
.findFirst()
.orElseThrow(
() -> new IllegalArgumentException(this.getClass().getName() + ": unitName Not found. " + unitName));
}
}

View File

@ -0,0 +1,22 @@
package com.myoa.engineering.crawl.ppomppu.processor.configuration.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@Setter
@Getter
@ConfigurationProperties(prefix = "spring.datasource.hikari")
public class HikariProperties {
private Integer minimumIdle;
private Integer maximumPoolSize;
private Integer idleTimeout;
private Integer validationTimeout;
private Integer connectionTimeout;
private Integer maxLifetime;
private Boolean autoCommit;
}

View File

@ -1,16 +1,17 @@
package com.myoa.engineering.crawl.ppomppu.processor.controller; package com.myoa.engineering.crawl.ppomppu.processor.controller;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.dto.APIResponse;
import com.myoa.engineering.crawl.ppomppu.processor.dto.FeedParsedResult; import com.myoa.engineering.crawl.ppomppu.processor.dto.FeedParsedResult;
import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuArticleService;
import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuFeedService; import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuFeedService;
import com.myoa.engineering.crawl.ppomppu.support.dto.APIResponse;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.List;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController; import org.springframework.web.bind.annotation.RestController;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
/** /**
@ -25,16 +26,21 @@ import reactor.core.publisher.Mono;
public class CrawlAPIController { public class CrawlAPIController {
private final PpomppuFeedService ppomppuRSSFeedService; private final PpomppuFeedService ppomppuRSSFeedService;
private final PpomppuArticleService ppomppuArticleService;
public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService) { public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService,
PpomppuArticleService ppomppuArticleService) {
this.ppomppuRSSFeedService = ppomppuRSSFeedService; this.ppomppuRSSFeedService = ppomppuRSSFeedService;
this.ppomppuArticleService = ppomppuArticleService;
} }
@PostMapping("/boards/{boardName}") @PostMapping("/boards/{boardName}")
public Mono<APIResponse<FeedParsedResult>> crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) { public Mono<APIResponse<FeedParsedResult>> crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) {
log.info("got request... {}", boardName); log.info("got request... {}", boardName);
FeedParsedResult result = FeedParsedResult.of(boardName); FeedParsedResult result = FeedParsedResult.of(boardName);
Flux<PpomppuArticle> articles = ppomppuRSSFeedService.getArticles(boardName); Mono<List<PpomppuArticle>> articles = ppomppuRSSFeedService.getArticles(boardName)
.doOnNext(e -> ppomppuArticleService.filterOnlyNewArticles(boardName, e))
.doOnNext(e -> ppomppuArticleService.save(boardName, e));
return articles.then(Mono.just(APIResponse.success(result.done()))); return articles.then(Mono.just(APIResponse.success(result.done())));
} }

View File

@ -1,22 +1,23 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain; package com.myoa.engineering.crawl.ppomppu.processor.domain;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.time.Instant; import java.time.Instant;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue; import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType; import javax.persistence.GenerationType;
import javax.persistence.Id; import javax.persistence.Id;
import javax.persistence.Table;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
/**
* PpomppuArticle
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Getter @Getter
@NoArgsConstructor @NoArgsConstructor
@Entity
@Table(name = "ppomppu_article")
public class PpomppuArticle extends Auditable { public class PpomppuArticle extends Auditable {
@Id @Id
@ -26,6 +27,10 @@ public class PpomppuArticle extends Auditable {
@Column @Column
private Long articleId; private Long articleId;
@Column
@Enumerated(EnumType.STRING)
private PpomppuBoardName boardName;
@Column @Column
private String articleUrl; private String articleUrl;
@ -42,10 +47,11 @@ public class PpomppuArticle extends Auditable {
private Instant registeredAt; private Instant registeredAt;
@Builder @Builder
public PpomppuArticle(Long id, Long articleId, String articleUrl, String title, public PpomppuArticle(Long id, Long articleId, PpomppuBoardName boardName, String articleUrl,
Integer recommended, Integer hit, Instant registeredAt) { String title, Integer recommended, Integer hit, Instant registeredAt) {
this.id = id; this.id = id;
this.articleId = articleId; this.articleId = articleId;
this.boardName = boardName;
this.articleUrl = articleUrl; this.articleUrl = articleUrl;
this.title = title; this.title = title;
this.recommended = recommended; this.recommended = recommended;
@ -53,16 +59,8 @@ public class PpomppuArticle extends Auditable {
this.registeredAt = registeredAt; this.registeredAt = registeredAt;
} }
@Override public PpomppuArticle updateBoardName(PpomppuBoardName boardName) {
public String toString() { this.boardName = boardName;
return "PpomppuArticle{" + return this;
"id=" + id +
", articleId=" + articleId +
", articleUrl='" + articleUrl + '\'' +
", title='" + title + '\'' +
", hit=" + hit +
", recommended=" + recommended +
", registeredAt=" + registeredAt +
'}';
} }
} }

View File

@ -0,0 +1,57 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
@Getter
@NoArgsConstructor
@Entity
@Table(name = "ppomppu_board_feed_status")
public class PpomppuBoardFeedStatus extends Auditable {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column
private Long latestParsedArticleId;
@Column
@Enumerated(EnumType.STRING)
private PpomppuBoardName boardName;
@Column
private Instant updatedAt;
public static PpomppuBoardFeedStatus of(PpomppuBoardName boardName, Long latestArticleId) {
return PpomppuBoardFeedStatus.builder()
.boardName(boardName)
.latestParsedArticleId(latestArticleId)
.updatedAt(Instant.now())
.build();
}
public void updateArticleId(Long latestArticleId) {
this.updatedAt = Instant.now();
this.latestParsedArticleId = latestArticleId;
}
@Builder
public PpomppuBoardFeedStatus(Long id, Long latestParsedArticleId, PpomppuBoardName boardName, Instant updatedAt) {
this.id = id;
this.latestParsedArticleId = latestParsedArticleId;
this.boardName = boardName;
this.updatedAt = updatedAt;
}
}

View File

@ -0,0 +1,32 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain;
import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import lombok.Getter;
import lombok.NoArgsConstructor;
@Getter
@NoArgsConstructor
@Entity
@Table(name = "published_history")
public class PublishedHistory extends Auditable {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column
private Long userId;
@Column
private String boardNameList;
@Column
private Instant publishedAt;
}

View File

@ -0,0 +1,32 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import lombok.Getter;
import lombok.NoArgsConstructor;
@Getter
@NoArgsConstructor
@Entity
@Table(name = "subscribed_board")
public class SubscribedBoard extends Auditable {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column
private Long userId;
@Column
@Enumerated(EnumType.STRING)
private PpomppuBoardName boardName;
}

View File

@ -0,0 +1,29 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain;
import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import lombok.Getter;
import lombok.NoArgsConstructor;
@Getter
@NoArgsConstructor
@Entity
@Table(name = "subscribed_user")
public class SubscribedUser extends Auditable{
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column
private Long userId;
@Column
private Instant registeredAt;
}

View File

@ -1,6 +1,7 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto; package com.myoa.engineering.crawl.ppomppu.processor.dto;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -47,7 +48,7 @@ public final class PpomppuArticleTransformer {
} }
public static String toArticleUrl(Element td) { public static String toArticleUrl(Element td) {
return td.getElementsByTag("a").attr("href"); return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href"));
} }
public static Integer toRecommended(Element td) { public static Integer toRecommended(Element td) {

View File

@ -2,6 +2,7 @@ package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory; import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory;
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory; import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient; import org.springframework.web.reactive.function.client.WebClient;
@ -19,12 +20,10 @@ import reactor.core.scheduler.Schedulers;
@Component @Component
public class PpomppuBoardFeedRetriever { public class PpomppuBoardFeedRetriever {
private static final String PPOMPPU_URL = "https://www.ppomppu.co.kr/";
private final WebClient webClient; private final WebClient webClient;
public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) { public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) {
this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL) this.webClient = webClientBuilder.baseUrl(PpomppuBoardName.PPOMPPU_URL)
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml()) .exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml())
.filter(WebClientFilterFactory.logRequest()) .filter(WebClientFilterFactory.logRequest())
.filter(WebClientFilterFactory.logResponse()) .filter(WebClientFilterFactory.logResponse())
@ -33,7 +32,7 @@ public class PpomppuBoardFeedRetriever {
public Mono<String> getHtml(String uri) { public Mono<String> getHtml(String uri) {
return webClient.get() return webClient.get()
.uri("/zboard/zboard.php?id=ppomppu") .uri(uri)
.exchangeToMono(e -> e.bodyToMono(String.class)) .exchangeToMono(e -> e.bodyToMono(String.class))
.publishOn(Schedulers.boundedElastic()) .publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> { .onErrorResume(WebClientRequestException.class, t -> {

View File

@ -0,0 +1,4 @@
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository;
public interface BaseScanRepository {
}

View File

@ -0,0 +1,10 @@
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface PpomppuArticleRepository extends JpaRepository<PpomppuArticle, Long> {
}

View File

@ -0,0 +1,14 @@
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuBoardFeedStatus;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.Optional;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface PpomppuBoardFeedStatusRepository extends JpaRepository<PpomppuBoardFeedStatus, Long> {
Optional<PpomppuBoardFeedStatus> findByBoardName(PpomppuBoardName boardName);
}

View File

@ -0,0 +1,59 @@
package com.myoa.engineering.crawl.ppomppu.processor.service;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuBoardFeedStatus;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository.PpomppuArticleRepository;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository.PpomppuBoardFeedStatusRepository;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@Slf4j
@Service
public class PpomppuArticleService {
private final PpomppuArticleRepository ppomppuArticleRepository;
private final PpomppuBoardFeedStatusRepository ppomppuBoardFeedStatusRepository;
public PpomppuArticleService(PpomppuArticleRepository ppomppuArticleRepository,
PpomppuBoardFeedStatusRepository ppomppuBoardFeedStatusRepository) {
this.ppomppuArticleRepository = ppomppuArticleRepository;
this.ppomppuBoardFeedStatusRepository = ppomppuBoardFeedStatusRepository;
}
@Transactional(readOnly = true)
public List<PpomppuArticle> filterOnlyNewArticles(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
Long latestArticleId = boardFeedStatus.map(PpomppuBoardFeedStatus::getLatestParsedArticleId)
.orElse(0L);
return articles.stream()
.filter(e -> e.getArticleId().compareTo(latestArticleId) > 0)
.collect(Collectors.toList());
}
@Transactional
public void save(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
Long latestArticleId = articles.stream()
.map(PpomppuArticle::getArticleId)
.max(Long::compareTo)
.orElse(0L);
// save PpomppuBoardFeedStatus
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
boardFeedStatus.ifPresentOrElse(e -> {
e.updateArticleId(latestArticleId);
ppomppuBoardFeedStatusRepository.save(e);
},
() -> ppomppuBoardFeedStatusRepository.save(PpomppuBoardFeedStatus.of(boardName,
latestArticleId)));
// save real articles.
ppomppuArticleRepository.saveAll(articles);
}
}

View File

@ -4,6 +4,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer; import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever; import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.List;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -27,12 +28,14 @@ public class PpomppuFeedService {
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever; this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
} }
public Flux<PpomppuArticle> getArticles(PpomppuBoardName boardName) { public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath()); final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
final Mono<Element> tbody = extractTbodyFromHtml(html) final Mono<Element> tbody = extractTbodyFromHtml(html)
.doOnNext(e -> log.info("pre tbody - {}", e.html())); .doOnNext(e -> log.info("pre tbody - {}", e.html()));
return extractArticlesFromTbody(tbody).map(this::convertFromElement) return extractArticlesFromTbody(tbody).map(this::convertFromElement)
.doOnNext(e -> log.info("parsed Result: {}", e)); .map(e -> e.updateBoardName(boardName))
.doOnNext(e -> log.info("parsed Result: {}", e))
.collectList();
} }
private Mono<Element> extractTbodyFromHtml(Mono<String> html) { private Mono<Element> extractTbodyFromHtml(Mono<String> html) {

View File

@ -4,4 +4,5 @@ spring:
on-profile: development on-profile: development
import: import:
- classpath:/development/webclient.yml - classpath:/development/webclient.yml
- classpath:/development/temppassword.yml
- classpath:/development/database.yml - classpath:/development/database.yml

View File

@ -11,4 +11,4 @@ server:
port: 20081 port: 20081
error: error:
whitelabel: whitelabel:
enabled: false enabled: false

View File

@ -33,7 +33,6 @@ public class MessageDispatcher extends TelegramLongPollingBot {
Message message = update.getMessage(); Message message = update.getMessage();
MessageHandler handler = getMessageHandler(message); MessageHandler handler = getMessageHandler(message);
log.info(message.getText());
handler.handle(message); handler.handle(message);
} }

View File

@ -0,0 +1,21 @@
package com.myoa.engineering.crawl.ppomppu.receiver.handler.message;
import com.myoa.engineering.crawl.ppomppu.support.util.ObjectUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import org.telegram.telegrambots.meta.api.objects.Message;
@Slf4j
@Component
public class HelloWorldMessageHandler implements MessageHandler {
@Override
public boolean isApplicable(Message message) {
return ObjectUtil.isEmpty(message);
}
@Override
public void handle(Message message) {
// skip empty event message.
}
}

View File

@ -1,17 +1,18 @@
package com.myoa.engineering.crawl.ppomppu.receiver.handler.message; package com.myoa.engineering.crawl.ppomppu.receiver.handler.message;
import com.myoa.engineering.crawl.ppomppu.support.util.ObjectUtil;
import org.telegram.telegrambots.meta.api.objects.Message; import org.telegram.telegrambots.meta.api.objects.Message;
/** /**
* TextMessageHandler * TextMessageHandler
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com) * @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-08-21 * @since 2021-08-21
*
*/ */
public interface TextMessageHandler extends MessageHandler { public interface TextMessageHandler extends MessageHandler {
@Override @Override
default boolean isApplicable(Message message) { default boolean isApplicable(Message message) {
return message.isUserMessage() && message.hasText(); return ObjectUtil.isNotEmpty(message) && message.isUserMessage() && message.hasText();
} }
} }

View File

@ -3,5 +3,5 @@ spring:
activate: activate:
on-profile: development on-profile: development
import: import:
- classpath:/webclient-development.yml - classpath:/development/webclient.yml
- classpath:/temppassword.yml - classpath:/development/temppassword.yml

View File

@ -12,34 +12,42 @@ import lombok.NoArgsConstructor;
@Getter @Getter
@NoArgsConstructor @NoArgsConstructor
public enum PpomppuBoardName { public enum PpomppuBoardName {
PPOMPPU_DOMESTIC_ETC("zboard/zboard.php?id=ppomppu&category=1"), PPOMPPU_DOMESTIC_ETC("zboard/zboard.php?id=ppomppu&category=1", true),
PPOMPPU_DOMESTIC_COMPUTER("zboard/zboard.php?id=ppomppu&category=4"), PPOMPPU_DOMESTIC_COMPUTER("zboard/zboard.php?id=ppomppu&category=4", true),
PPOMPPU_DOMESTIC_DIGITAL("zboard/zboard.php?id=ppomppu&category=5"), PPOMPPU_DOMESTIC_DIGITAL("zboard/zboard.php?id=ppomppu&category=5", true),
PPOMPPU_DOMESTIC_FOOD("zboard/zboard.php?id=ppomppu&category=6"), PPOMPPU_DOMESTIC_FOOD("zboard/zboard.php?id=ppomppu&category=6", true),
PPOMPPU_DOMESTIC_BOOK("zboard/zboard.php?id=ppomppu&category=8"), PPOMPPU_DOMESTIC_BOOK("zboard/zboard.php?id=ppomppu&category=8", true),
PPOMPPU_DOMESTIC_APPLIANCES("zboard/zboard.php?id=ppomppu&category=9"), PPOMPPU_DOMESTIC_APPLIANCES("zboard/zboard.php?id=ppomppu&category=9", true),
PPOMPPU_DOMESTIC_PARENTING("zboard/zboard.php?id=ppomppu&category=10"), PPOMPPU_DOMESTIC_PARENTING("zboard/zboard.php?id=ppomppu&category=10", true),
PPOMPPU_DOMESTIC_GIFTCARD("zboard/zboard.php?id=ppomppu&category=11"), PPOMPPU_DOMESTIC_GIFTCARD("zboard/zboard.php?id=ppomppu&category=11", true),
PPOMPPU_DOMESTIC_CLOTHES("zboard/zboard.php?id=ppomppu&category=12"), PPOMPPU_DOMESTIC_CLOTHES("zboard/zboard.php?id=ppomppu&category=12", true),
PPOMPPU_DOMESTIC_COSMETIC("zboard/zboard.php?id=ppomppu&category=13"), PPOMPPU_DOMESTIC_COSMETIC("zboard/zboard.php?id=ppomppu&category=13", true),
PPOMPPU_DOMESTIC_OUTDOOR("zboard/zboard.php?id=ppomppu&category=15"), PPOMPPU_DOMESTIC_OUTDOOR("zboard/zboard.php?id=ppomppu&category=15", true),
PPOMPPU_OVERSEA_ETC("zboard/zboard.php?id=ppomppu4&category=1"), PPOMPPU_OVERSEA_ETC("zboard/zboard.php?id=ppomppu4&category=1", true),
PPOMPPU_OVERSEA_APPLIANCES("zboard/zboard.php?id=ppomppu4&category=7"), PPOMPPU_OVERSEA_APPLIANCES("zboard/zboard.php?id=ppomppu4&category=7", true),
PPOMPPU_OVERSEA_TVAV("zboard/zboard.php?id=ppomppu4&category=8"), PPOMPPU_OVERSEA_TVAV("zboard/zboard.php?id=ppomppu4&category=8", true),
PPOMPPU_OVERSEA_COMPUTER("zboard/zboard.php?id=ppomppu4&category=3"), PPOMPPU_OVERSEA_COMPUTER("zboard/zboard.php?id=ppomppu4&category=3", true),
PPOMPPU_OVERSEA_DIGITAL("zboard/zboard.php?id=ppomppu4&category=4"), PPOMPPU_OVERSEA_DIGITAL("zboard/zboard.php?id=ppomppu4&category=4", true),
PPOMPPU_OVERSEA_MOBILEACCESSORY("zboard/zboard.php?id=ppomppu4&category=9"), PPOMPPU_OVERSEA_MOBILEACCESSORY("zboard/zboard.php?id=ppomppu4&category=9", true),
PPOMPPU_OVERSEA_CLOTHES("zboard/zboard.php?id=ppomppu4&category=5"), PPOMPPU_OVERSEA_CLOTHES("zboard/zboard.php?id=ppomppu4&category=5", true),
PPOMPPU_OVERSEA_WATCH("zboard/zboard.php?id=ppomppu4&category=2"), PPOMPPU_OVERSEA_WATCH("zboard/zboard.php?id=ppomppu4&category=2", true),
PPOMPPU_OVERSEA_SHOES("zboard/zboard.php?id=ppomppu4&category=11"), PPOMPPU_OVERSEA_SHOES("zboard/zboard.php?id=ppomppu4&category=11", true),
PPOMPPU_OVERSEA_FOOD("zboard/zboard.php?id=ppomppu4&category=10"), PPOMPPU_OVERSEA_FOOD("zboard/zboard.php?id=ppomppu4&category=10", true),
PPOMPPU_OVERSEA_PARENTING("zboard/zboard.php?id=ppomppu4&category=6"), PPOMPPU_OVERSEA_PARENTING("zboard/zboard.php?id=ppomppu4&category=6", true),
; ;
private String resourcePath; private String resourcePath;
private boolean crawlWithDefaultTimer;
PpomppuBoardName(String boardPath) { PpomppuBoardName(String boardPath, boolean crawlWithDefaultTimer) {
this.resourcePath = boardPath; this.resourcePath = boardPath;
this.crawlWithDefaultTimer = crawlWithDefaultTimer;
}
public static final String PPOMPPU_URL = "https://www.ppomppu.co.kr/";
public static String ofViewPageUrl(String articleUrl) {
return PPOMPPU_URL + "zboard/" + articleUrl;
} }
} }

View File

@ -4,4 +4,12 @@ public final class ObjectUtil {
private ObjectUtil() {} private ObjectUtil() {}
public static boolean isEmpty(Object o) {
return o == null;
}
public static boolean isNotEmpty(Object o) {
return !isEmpty(o);
}
} }

View File

@ -0,0 +1,41 @@
spring:
jpa:
open-in-view: false
hibernate:
ddl-auto: create
datasource:
# driver-class-name: com.mysql.cj.jdbc.Driver
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:ppomppu-local;DB_CLOSE_DELAY=-1
hikari:
minimum:idle: 5
maximum-pool-size: 10
idle-timeout: 600000
validation-timeout: 5000
connection-timeout: 5000
max-lifetime: 1800000
auto-commit: false
h2:
console:
enabled: true
path: /h2
port: 20082
datasource:
init: true
units:
- unit-name: ppomppu
schema-name: ppomppu-local
db-connection-url: jdbc:h2:mem:ppomppu-local
simple-connection-url: true
hibernate:
units:
- unit-name: ppomppu
dialect: org.hibernate.dialect.H2Dialect
format-sql: true
show-sql: true
hbm2ddl-auto: create
disable-auto-commit: true