Compare commits

..

8 Commits

Author SHA1 Message Date
woozu-shin 378e6ef68e [NO-ISSUE] Change references 2024-05-09 09:32:39 +09:00
woozu-shin 08737664f4 Merge branch 'feature/NO-ISSUE-v2' into develop 2024-05-09 08:50:05 +09:00
nthfuncx 4738b930df Merge pull request '[PPN-13] Deal with new page board structure' (#14) from feature/PPN-13 into develop
Reviewed-on: OutworldDestroyer/PpomppuNotifier#14
2022-01-02 12:50:21 +09:00
woo-jin.shin 6d315e2a9f [PPN-13] Deal with new page board structure 2022-01-02 12:48:10 +09:00
woo-jin.shin 8eb431a812 Prettify section message 2021-12-04 01:29:32 +09:00
woo-jin.shin 520a651a70 Apply accessory image 2021-12-04 01:07:42 +09:00
woo-jin.shin 7b230fdb74 [NO-ISSUE] Fix bug 2021-12-01 01:16:15 +09:00
woo-jin.shin b204b70b79 [NO-ISSUE] Fix bug 2021-12-01 01:16:01 +09:00
42 changed files with 752 additions and 395 deletions

View File

@ -1,14 +1,13 @@
plugins { plugins {
id 'java' id 'java'
id 'idea' id 'idea'
id 'org.springframework.boot' version '3.2.5' id 'org.springframework.boot' version '2.5.4'
id 'io.spring.dependency-management' version '1.1.4' id 'io.spring.dependency-management' version '1.0.11.RELEASE'
} }
group = 'com.myoa.engineering.crawl.ppomppu' group = 'com.myoa.engineering.crawl.shopping'
version = '1.0.1' version = '1.1.1'
sourceCompatibility = JavaVersion.VERSION_21 sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_21
configurations { configurations {
compileOnly { compileOnly {
@ -22,7 +21,7 @@ repositories {
allprojects { allprojects {
group = 'com.myoa.engineering.crawl.shopping' group = 'com.myoa.engineering.crawl.shopping'
version = '2.0.0' version = '1.1.1'
apply plugin: 'java' apply plugin: 'java'
apply plugin: 'idea' apply plugin: 'idea'
@ -37,7 +36,7 @@ allprojects {
} }
ext { ext {
set('springCloudVersion', "2023.0.1") set('springCloudVersion', "2020.0.4")
} }
dependencyManagement { dependencyManagement {

3
copy.bat Normal file
View File

@ -0,0 +1,3 @@
xcopy /y .\processor\build\libs\*.jar .\
xcopy /y .\receiver\build\libs\*.jar .\
xcopy /y .\sender\build\libs\*.jar .\

View File

@ -0,0 +1,70 @@
package com.myoa.engineering.crawl.shopping.controller;
import java.util.List;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import com.myoa.engineering.crawl.shopping.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.processor.dto.FeedParsedResult;
import com.myoa.engineering.crawl.shopping.processor.service.MessageSenderService;
import com.myoa.engineering.crawl.shopping.processor.service.PpomppuArticleService;
import com.myoa.engineering.crawl.shopping.processor.service.PpomppuFeedService;
import com.myoa.engineering.crawl.shopping.support.dto.APIResponse;
import com.myoa.engineering.crawl.shopping.support.dto.code.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Mono;
/**
* CrawlAPIController
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-09-05
*/
@Slf4j
@RestController
@RequestMapping("/api/v1/crawl")
public class CrawlAPIController {
private final PpomppuFeedService ppomppuRSSFeedService;
private final PpomppuArticleService ppomppuArticleService;
private final MessageSenderService messageSenderService;
public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService,
PpomppuArticleService ppomppuArticleService,
MessageSenderService messageSenderService) {
this.ppomppuRSSFeedService = ppomppuRSSFeedService;
this.ppomppuArticleService = ppomppuArticleService;
this.messageSenderService = messageSenderService;
}
@PostMapping("/boards/{boardName}")
public Mono<APIResponse<FeedParsedResult>> crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) {
log.info("got request... {}", boardName);
FeedParsedResult result = FeedParsedResult.of(boardName);
Mono<String> publishedMessages =
ppomppuRSSFeedService.getArticles(boardName)
.map(e -> ppomppuArticleService.filterOnlyNewArticles(boardName, e))
.map(e -> ppomppuArticleService.save(boardName, e))
.filter(e -> !e.isEmpty())
.flatMap(e -> messageSenderService.sendBlockMessageToSlack(boardName, e));
return publishedMessages.then(Mono.just(APIResponse.success(result.done())));
}
@PostMapping("/exploit/boards/{boardName}")
public Mono<APIResponse<String>> crawlBoardDryRun(
@PathVariable("boardName") PpomppuBoardName boardName) {
log.info("got request... {}", boardName);
Mono<String> publishedMessages =
ppomppuRSSFeedService.getArticles(boardName)
.flatMap(e -> messageSenderService.sendBlockMessageToSlack(boardName, e));
return publishedMessages.map(APIResponse::success);
}
}

View File

@ -0,0 +1,78 @@
package com.myoa.engineering.crawl.shopping.dto;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.myoa.engineering.crawl.shopping.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.dto.code.PpomppuBoardName;
/**
* PpomppuArticleTransformer
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
public final class PpomppuArticleParser {
private PpomppuArticleParser() {}
public static PpomppuArticle toArticle(Elements articleElement) {
final String articleIdString = PpomppuArticleParser.parseArticleId(articleElement.get(0));
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
final Integer recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
final String hitString = PpomppuArticleParser.parseHit(articleElement.get(7));
final String registeredAtString = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
return PpomppuArticleParseDTO.builder()
.articleId(articleIdString)
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(hitString)
.registeredAt(registeredAtString)
.build()
.convert();
}
public static String parseArticleId(Element td) {
return td.text().trim();
}
public static String parseTitle(Element td) {
return td.getElementsByTag("a").text();
}
public static String parseArticleUrl(Element td) {
return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href"));
}
public static String parseThumbnailUrl(Element td) {
return "https:" + td.getElementsByTag("img").get(0).attr("src");
}
public static Integer parseRecommended(Element td) {
final String voteString = td.text();
final int recommended;
if (voteString.isEmpty()) {
recommended = 0;
} else {
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
recommended = voteUp - voteDown;
}
return recommended;
}
public static String parseHit(Element td) {
return td.text();
}
public static String parseRegisteredAt(Element td) {
return td.attr("title");
}
}

View File

@ -0,0 +1,38 @@
package com.myoa.engineering.crawl.shopping.receiver.scheduler;
import java.util.Arrays;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import com.myoa.engineering.crawl.shopping.receiver.service.ProcessorAPIService;
import com.myoa.engineering.crawl.shopping.support.dto.code.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j;
/**
* ParseEventEmitter
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-09-05
*
*/
@Slf4j
@Component
@EnableScheduling
public class ParseEventEmitter {
private final ProcessorAPIService processorAPIService;
public ParseEventEmitter(ProcessorAPIService processorAPIService) {
this.processorAPIService = processorAPIService;
}
@Scheduled(fixedRate = 600 * 1000L)
public void emitBoards() {
log.info("[emitDomesticBoard] trigger fired!");
Arrays.stream(PpomppuBoardName.values())
.filter(PpomppuBoardName::isCrawlWithDefaultTimer)
.forEach(boardName -> processorAPIService.emitParseEvent(boardName).block());
}
}

View File

@ -0,0 +1,6 @@
spring:
config:
activate:
on-profile: production
import:
- "configserver:http://ppn-config-server:20080"

View File

@ -0,0 +1,25 @@
spring:
application:
name: ppn-receiver
main:
allow-bean-definition-overriding: true
profiles:
active: ${SPRING_ACTIVE_PROFILE:local}
group:
local: "local,webclient-local"
development: "development,webclient-development"
production: "production,webclient-production"
freemarker:
enabled: false
server:
port: 20080
error:
whitelabel:
enabled: false
management:
endpoints:
web:
exposure:
include: refresh,health

View File

@ -0,0 +1,62 @@
package com.myoa.engineering.crawl.shopping.controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import com.myoa.engineering.crawl.shopping.sender.dto.SlackBaseMessageBlock;
import com.myoa.engineering.crawl.shopping.sender.dto.SlackMessageDTO;
import com.myoa.engineering.crawl.shopping.sender.infrastructure.client.MongeShoppingBotSlackMessageSender;
import com.myoa.engineering.crawl.shopping.support.dto.APIResponse;
import com.myoa.engineering.crawl.shopping.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.shopping.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.shopping.support.util.ObjectMapperFactory;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Mono;
/**
* MessageSenderAPIController
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-21
*
*/
@Slf4j
@RestController
@RequestMapping("/api/v1")
public class MessageSenderAPIController {
private final MongeShoppingBotSlackMessageSender sender;
public MessageSenderAPIController(MongeShoppingBotSlackMessageSender sender) {
this.sender = sender;
}
@PostMapping("/messages/sendSimpleMessage/messengers/slack")
public Mono<APIResponse<SimpleMessageDTO>> sendSimpleMessageToSlack(@RequestBody SimpleMessageDTO dto) {
return sender.sendMessage(sender.ofMessage(dto.getBody()))
.then(Mono.just(APIResponse.success(dto)));
}
@PostMapping("/messages/sendBlockMessage/messengers/slack")
public Mono<APIResponse<BlockMessageDTO>> sendBlockMessageToSlack(@RequestBody BlockMessageDTO dto) {
if (dto.getBlocks().isEmpty()) {
return Mono.just(APIResponse.fail(dto, "empty blocks"));
}
return sender.sendMessage(buildSlackMessageDTO(dto))
// .doOnNext(e -> log.info("[sendBlockMessageToSlack] slackMessageDTO: {}",
// ObjectMapperFactory.writeAsString(buildSlackMessageDTO(dto))))
.then(Mono.just(APIResponse.success(dto)));
}
private SlackMessageDTO buildSlackMessageDTO(BlockMessageDTO dto) {
SlackMessageDTO slackMessageDTO = sender.ofBlockMessageBased();
slackMessageDTO.addSectionBlock(dto.getTitle());
dto.getBlocks().forEach(slackMessageDTO::addSectionBlock);
slackMessageDTO.addBlock(SlackBaseMessageBlock.ofDivider());
return slackMessageDTO;
}
}

View File

@ -0,0 +1,25 @@
spring:
application:
name: ppn-sender
main:
allow-bean-definition-overriding: true
profiles:
active: ${SPRING_ACTIVE_PROFILE:local}
group:
local: "local,slackapi-local,webclient-local"
development: "development,slackapi-development,webclient-development"
production: "production,slackapi-production,webclient-production"
freemarker:
enabled: false
server:
port: 20080
error:
whitelabel:
enabled: false
management:
endpoints:
web:
exposure:
include: refresh,health

View File

@ -1,7 +1,7 @@
dependencies { dependencies {
developmentOnly 'org.springframework.boot:spring-boot-devtools' developmentOnly 'org.springframework.boot:spring-boot-devtools'
runtimeOnly 'com.h2database:h2' runtimeOnly 'com.h2database:h2'
runtimeOnly 'com.mysql:mysql-connector-j' runtimeOnly 'com.mysql:mysql-connector-j:8.4.0'
compileOnly 'org.projectlombok:lombok' compileOnly 'org.projectlombok:lombok'
implementation project(':support') implementation project(':support')
@ -17,7 +17,6 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-configuration-processor' implementation 'org.springframework.boot:spring-boot-configuration-processor'
implementation 'org.springframework.cloud:spring-cloud-starter-config' implementation 'org.springframework.cloud:spring-cloud-starter-config'
implementation 'org.springframework.boot:spring-boot-starter-actuator' implementation 'org.springframework.boot:spring-boot-starter-actuator'
implementation 'com.rometools:rome:2.1.0'
implementation 'org.jsoup:jsoup:1.17.2' implementation 'org.jsoup:jsoup:1.17.2'
implementation 'com.h2database:h2:2.2.224' implementation 'com.h2database:h2:2.2.224'
implementation "org.springframework.cloud:spring-cloud-starter-openfeign" implementation "org.springframework.cloud:spring-cloud-starter-openfeign"

View File

@ -7,13 +7,12 @@ import com.myoa.engineering.crawl.shopping.domain.entity.BaseScanDomain;
import com.myoa.engineering.crawl.shopping.infra.repository.BaseScanRepository; import com.myoa.engineering.crawl.shopping.infra.repository.BaseScanRepository;
import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import jakarta.persistence.EntityManagerFactory;
import lombok.NonNull; import lombok.NonNull;
import org.hibernate.boot.model.naming.CamelCaseToUnderscoresNamingStrategy;
import org.hibernate.boot.model.naming.ImplicitNamingStrategyJpaCompliantImpl;
import org.hibernate.cfg.AvailableSettings; import org.hibernate.cfg.AvailableSettings;
import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder; import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder;
import org.springframework.boot.orm.jpa.hibernate.SpringImplicitNamingStrategy;
import org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
import org.springframework.data.jpa.repository.config.EnableJpaAuditing; import org.springframework.data.jpa.repository.config.EnableJpaAuditing;
@ -22,6 +21,7 @@ import org.springframework.orm.jpa.JpaTransactionManager;
import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean; import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean;
import org.springframework.transaction.PlatformTransactionManager; import org.springframework.transaction.PlatformTransactionManager;
import javax.persistence.EntityManagerFactory;
import javax.sql.DataSource; import javax.sql.DataSource;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
@ -96,8 +96,8 @@ public class ShoppingCrawlerDatasourceConfiguration {
properties.put(AvailableSettings.SHOW_SQL, hibernateProperties.getShowSql()); properties.put(AvailableSettings.SHOW_SQL, hibernateProperties.getShowSql());
properties.put(AvailableSettings.HBM2DDL_AUTO, hibernateProperties.getHbm2ddlAuto()); properties.put(AvailableSettings.HBM2DDL_AUTO, hibernateProperties.getHbm2ddlAuto());
properties.put(AvailableSettings.CONNECTION_PROVIDER_DISABLES_AUTOCOMMIT, hibernateProperties.getDisableAutoCommit()); properties.put(AvailableSettings.CONNECTION_PROVIDER_DISABLES_AUTOCOMMIT, hibernateProperties.getDisableAutoCommit());
properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY, ImplicitNamingStrategyJpaCompliantImpl.class.getName()); properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY, SpringImplicitNamingStrategy.class.getName());
properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY, CamelCaseToUnderscoresNamingStrategy.class.getName()); properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY, SpringPhysicalNamingStrategy.class.getName());
properties.put(AvailableSettings.GENERATE_STATISTICS, "false"); properties.put(AvailableSettings.GENERATE_STATISTICS, "false");
// properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS, "true"); // properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS, "true");
// properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS_SKIP_COLUMN_DEFINITIONS, "true"); // properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS_SKIP_COLUMN_DEFINITIONS, "true");

View File

@ -3,10 +3,11 @@ package com.myoa.engineering.crawl.shopping.domain.entity;
import lombok.Getter; import lombok.Getter;
import org.springframework.data.annotation.CreatedDate; import org.springframework.data.annotation.CreatedDate;
import org.springframework.data.annotation.LastModifiedDate; import org.springframework.data.annotation.LastModifiedDate;
import jakarta.persistence.*;
import org.springframework.data.jpa.domain.support.AuditingEntityListener; import org.springframework.data.jpa.domain.support.AuditingEntityListener;
import javax.persistence.Column;
import javax.persistence.EntityListeners;
import javax.persistence.MappedSuperclass;
import java.io.Serializable; import java.io.Serializable;
import java.time.Instant; import java.time.Instant;

View File

@ -2,15 +2,13 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v1;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable; import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName; import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import jakarta.persistence.*;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString;
import java.time.ZonedDateTime; import javax.persistence.*;
import java.time.Instant;
@ToString
@Getter @Getter
@NoArgsConstructor @NoArgsConstructor
@Entity @Entity
@ -31,6 +29,9 @@ public class PpomppuArticle extends Auditable {
@Column @Column
private String articleUrl; private String articleUrl;
@Column
private String thumbnailUrl;
@Column @Column
private String title; private String title;
@ -41,19 +42,25 @@ public class PpomppuArticle extends Auditable {
private Integer recommended; private Integer recommended;
@Column @Column
private ZonedDateTime registeredAt; private Instant registeredAt;
@Builder @Builder
public PpomppuArticle(Long id, Long articleId, PpomppuBoardName boardName, String articleUrl, public PpomppuArticle(Long id, Long articleId, PpomppuBoardName boardName, String articleUrl,
String title, Integer recommended, Integer hit, ZonedDateTime registeredAt) { String thumbnailUrl, String title, Integer recommended, Integer hit,
Instant registeredAt) {
this.id = id; this.id = id;
this.articleId = articleId; this.articleId = articleId;
this.boardName = boardName; this.boardName = boardName;
this.articleUrl = articleUrl; this.articleUrl = articleUrl;
this.thumbnailUrl = thumbnailUrl;
this.title = title; this.title = title;
this.recommended = recommended; this.recommended = recommended;
this.hit = hit; this.hit = hit;
this.registeredAt = registeredAt; this.registeredAt = registeredAt;
} }
public PpomppuArticle updateBoardName(PpomppuBoardName boardName) {
this.boardName = boardName;
return this;
}
} }

View File

@ -2,11 +2,11 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v1;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable; import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName; import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import jakarta.persistence.*;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import javax.persistence.*;
import java.time.Instant; import java.time.Instant;
@Getter @Getter

View File

@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import jakarta.persistence.*; import javax.persistence.*;
import java.time.Instant; import java.time.Instant;
@Getter @Getter

View File

@ -5,7 +5,7 @@ import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import jakarta.persistence.*; import javax.persistence.*;
@Getter @Getter
@NoArgsConstructor @NoArgsConstructor

View File

@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import jakarta.persistence.*; import javax.persistence.*;
import java.time.Instant; import java.time.Instant;
@Getter @Getter

View File

@ -1,7 +1,7 @@
package com.myoa.engineering.crawl.shopping.domain.entity.v2; package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable; import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import jakarta.persistence.*; import javax.persistence.*;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;

View File

@ -2,7 +2,7 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable; import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget; import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import jakarta.persistence.*; import javax.persistence.*;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;

View File

@ -2,7 +2,7 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable; import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget; import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import jakarta.persistence.*; import javax.persistence.*;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;

View File

@ -1,10 +0,0 @@
package com.myoa.engineering.crawl.shopping.dto;
/**
* PpomppuArticle
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
public class PpomppuArticleDTO {
}

View File

@ -0,0 +1,65 @@
package com.myoa.engineering.crawl.shopping.dto;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.util.DateUtil;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import java.time.Instant;
/**
* PpomppuArticleParseDTO
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Getter
@NoArgsConstructor
public class PpomppuArticleParseDTO {
private String id;
private String articleId;
private String boardName;
private String articleUrl;
private String thumbnailUrl;
private String title;
private String hit;
private Integer recommended;
private String registeredAt;
@Builder
public PpomppuArticleParseDTO(String id, String articleId, String boardName, String articleUrl,
String thumbnailUrl, String title, String hit, Integer recommended,
String registeredAt) {
this.id = id;
this.articleId = articleId;
this.boardName = boardName;
this.articleUrl = articleUrl;
this.thumbnailUrl = thumbnailUrl;
this.title = title;
this.hit = hit;
this.recommended = recommended;
this.registeredAt = registeredAt;
}
public boolean isInValidated() {
return articleId == null || articleId.isEmpty()
|| hit == null || hit.isEmpty();
}
public PpomppuArticle convert() {
if (isInValidated()) {
throw new IllegalArgumentException("PpomppuArticleParseDTO was invalidated");
}
return PpomppuArticle.builder()
.articleId(Long.parseLong(articleId))
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(Integer.parseInt(hit))
.registeredAt(DateUtil.DATE_TIME_FORMATTER.parse(registeredAt, Instant::from))
.build();
}
}

View File

@ -1,12 +1,14 @@
package com.myoa.engineering.crawl.shopping.dto; package com.myoa.engineering.crawl.shopping.dto;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle; import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.shopping.support.dto.SimpleMessageDTO; import com.myoa.engineering.crawl.shopping.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import com.myoa.engineering.crawl.shopping.support.util.DateUtil;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.List; import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
@ -20,34 +22,50 @@ public final class PpomppuArticleTransformer {
private PpomppuArticleTransformer() { private PpomppuArticleTransformer() {
} }
private static final String MESSAGE_FORMAT_V1 = "%s)) `%s` <%s:LINK>"; private static final String MESSAGE_FORMAT_V1 = "%s)) <%s|LINK> `%s` ";
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
.withZone(ZoneId.of("Asia/Seoul"));
/*
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = entity ->
SimpleMessageDTO.builder() SimpleMessageDTO.builder()
.requestedAt(Instant.now()) .requestedAt(Instant.now())
.publishedAt(entity.getRegisteredAt()) .publishedAt(article.getRegisteredAt())
.title(String.format(MESSAGE_FORMAT_V1, entity.getBoardName().getMenuName(), entity.getTitle())) .title(String.format(MESSAGE_FORMAT_V1,
.body(entity.getArticleUrl()) article.getBoardName().getMenuName(), article.getArticleUrl(),
article.getTitle()))
.body(article.getArticleUrl())
.build(); .build();
*/
// https://stackoverflow.com/questions/24882927/using-streams-to-convert-a-list-of-objects-into-a-string-obtained-from-the-tostr // https://stackoverflow.com/questions/24882927/using-streams-to-convert-a-list-of-objects-into-a-string-obtained-from-the-tostr
public static SimpleMessageDTO transform(List<PpomppuArticle> articles) { public static SimpleMessageDTO transformToSimpleMessage(List<PpomppuArticle> articles) {
Instant requestedAt = Instant.now(); Instant requestedAt = Instant.now();
String body = articles.stream() String body = articles.stream()
.map(PpomppuArticleTransformer::convertToInlineMessage) .map(PpomppuArticleTransformer::convertToInlineMessage)
.collect(Collectors.joining("\n\n")); .collect(Collectors.joining("\n\n"));
return SimpleMessageDTO.builder() return SimpleMessageDTO.builder()
.requestedAt(requestedAt) .requestedAt(requestedAt)
.title(DATE_TIME_FORMATTER.format(requestedAt)) .title(DateUtil.DATE_TIME_FORMATTER.format(requestedAt))
.body(body) .body(body)
.build(); .build();
} }
public static BlockMessageDTO transformToBlockMessage(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
Instant requestedAt = Instant.now();
List<BlockMessageDTO.Block> body = articles.stream()
.map(e -> BlockMessageDTO.createBlock(convertToInlineMessage(e),
e.getThumbnailUrl()))
.collect(Collectors.toList());
return BlockMessageDTO.builder()
.requestedAt(requestedAt)
.title(String.format(TITLE_FORMAT_V1,
boardName.getMenuName(),
DateUtil.DATE_TIME_FORMATTER.format(requestedAt)))
.blocks(body)
.build();
}
public static String convertToInlineMessage(PpomppuArticle article) { public static String convertToInlineMessage(PpomppuArticle article) {
return String.format(MESSAGE_FORMAT_V1, return String.format(MESSAGE_FORMAT_V2,
article.getBoardName().getMenuName(), article.getTitle(), article.getArticleUrl()); article.getBoardName().getMenuName(), article.getArticleUrl(), article.getTitle());
} }
} }

View File

@ -0,0 +1,47 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
/**
* MessageBlock
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-30
*
*/
@Getter
@NoArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SlackBaseMessageBlock implements SlackMessageBlock {
private static final long serialVersionUID = 1597984001727808419L;
private SlackMessageBlockType type;
private String text;
@Builder
private SlackBaseMessageBlock(SlackMessageBlockType type, String text) {
this.type = type;
this.text = text;
}
public static SlackBaseMessageBlock ofMarkDown(String message) {
return SlackBaseMessageBlock.builder()
.type(SlackMessageBlockType.MARKDOWN)
.text(message)
.build();
}
public static SlackBaseMessageBlock ofDivider() {
return SlackBaseMessageBlock.builder()
.type(SlackMessageBlockType.DIVIDER)
.build();
}
@Override
public String getType() {
return type.getType();
}
}

View File

@ -0,0 +1,49 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
/**
* SlackImageMessageBlock
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-30
*
*/
@Getter
@NoArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SlackImageMessageBlock implements SlackMessageBlock {
private static final long serialVersionUID = 1597984001727808419L;
private SlackMessageBlockType type;
@JsonProperty(value = "image_url", required = true)
private String imageUrl;
@JsonProperty(value = "alt_text", required = true)
private String altText;
@Builder
private SlackImageMessageBlock(SlackMessageBlockType type, String imageUrl, String altText) {
this.type = type;
this.imageUrl = imageUrl;
this.altText = altText;
}
public static SlackImageMessageBlock of(String imageUrl, String altText) {
return SlackImageMessageBlock.builder()
.type(SlackMessageBlockType.IMAGE)
.imageUrl(imageUrl)
.altText(altText)
.build();
}
@Override
public String getType() {
return type.getType();
}
}

View File

@ -0,0 +1,15 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import java.io.Serializable;
/**
* SlackMessageBlock
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-12-01
*
*/
public interface SlackMessageBlock extends Serializable {
String getType();
}

View File

@ -0,0 +1,22 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* BlockType
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-30
*
*/
@Getter
@AllArgsConstructor
public enum SlackMessageBlockType {
SECTION("section"),
MARKDOWN("mrkdwn"),
DIVIDER("divider"),
IMAGE("image"),
;
private String type;
}

View File

@ -1,19 +1,23 @@
package com.myoa.engineering.crawl.shopping.dto.slack; package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.myoa.engineering.crawl.shopping.support.dto.BlockMessageDTO;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import java.util.List;
/** /**
* SlackMessageDTO * SlackMessageDTO
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com) * @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-14 * @since 2021-11-14
*
*/ */
@Getter @Getter
@NoArgsConstructor @NoArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SlackMessageDTO implements MessageDTO { public class SlackMessageDTO implements MessageDTO {
private final static long serialVersionUID = 4737608709660494713L; private final static long serialVersionUID = 4737608709660494713L;
@ -21,19 +25,37 @@ public class SlackMessageDTO implements MessageDTO {
private String text; private String text;
private String channel; private String channel;
private String username; private String username;
private List<SlackMessageBlock> blocks;
@JsonProperty("icon_emoji") @JsonProperty("icon_emoji")
private String iconEmoji; private String iconEmoji;
@Builder @Builder
public SlackMessageDTO(String text, String channel, String username, String iconEmoji) { public SlackMessageDTO(String text, String channel, String username,
List<SlackMessageBlock> blocks, String iconEmoji) {
this.text = text; this.text = text;
this.channel = channel; this.channel = channel;
this.username = username; this.username = username;
this.blocks = blocks;
this.iconEmoji = iconEmoji; this.iconEmoji = iconEmoji;
} }
public void applyText(String text) { public void applyText(String text) {
this.text = text; this.text = text;
} }
public void addSectionBlock(BlockMessageDTO.Block block) {
SlackSectionMessageBlock slackSectionMessageBlock = SlackSectionMessageBlock.ofMarkDown(block.getText());
slackSectionMessageBlock.applyImageaccessory(block.getImageUrl(), block.getAltText());
addBlock(slackSectionMessageBlock);
}
public void addSectionBlock(String rawBlockMessage) {
addBlock(SlackSectionMessageBlock.ofMarkDown(rawBlockMessage));
}
public void addBlock(SlackMessageBlock block) {
blocks.add(block);
}
} }

View File

@ -0,0 +1,48 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
/**
* SectionBlock
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-30
*/
@Getter
@NoArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class SlackSectionMessageBlock implements SlackMessageBlock {
private static final long serialVersionUID = -7600944576753160168L;
private SlackMessageBlockType type;
private SlackBaseMessageBlock text;
private SlackImageMessageBlock accessory;
@Builder
private SlackSectionMessageBlock(SlackMessageBlockType type, SlackBaseMessageBlock text,
SlackImageMessageBlock accessory) {
this.type = type;
this.text = text;
this.accessory = accessory;
}
public static SlackSectionMessageBlock ofMarkDown(String message) {
return SlackSectionMessageBlock.builder()
.type(SlackMessageBlockType.SECTION)
.text(SlackBaseMessageBlock.ofMarkDown(message))
.build();
}
public SlackSectionMessageBlock applyImageaccessory(String imageUrl, String altText) {
this.accessory = SlackImageMessageBlock.of(imageUrl, altText);
return this;
}
@Override
public String getType() {
return type.getType();
}
}

View File

@ -1,43 +0,0 @@
package com.myoa.engineering.crawl.shopping.infra.client;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
/**
* PpomppuNotifierSenderAPIClient
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-17
*/
@Slf4j
@Component
public class MessageSenderAPIClient {
/*
private final WebClient webClient;
public MessageSenderAPIClient(WebClientProperties webClientProperties) {
WebClientPropertiesUnit webClientPropertiesUnit =
webClientProperties.find(WebClientPropertiesUnitName.PPOMPPU_NOTIFIER_SENDER_API.getUnitName());
this.webClient = WebClient.builder()
.baseUrl(webClientPropertiesUnit.getBaseUrl())
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
// .filter(WebClientFilterFactory.logRequest())
// .filter(WebClientFilterFactory.logResponse())
.build();
}
public Mono<String> sendMessageToSlack(SimpleMessageDTO dto) {
return webClient.post()
.uri("/api/v1/messages/sendMessage/messengers/slack")
.bodyValue(dto)
.exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
.publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> {
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
return Mono.empty();
});
}
*/
}

View File

@ -1,41 +0,0 @@
package com.myoa.engineering.crawl.shopping.infra.client.slack;
import com.myoa.engineering.crawl.shopping.configuration.slack.properties.SlackSecretProperties;
import com.myoa.engineering.crawl.shopping.dto.slack.SlackMessageDTO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class MongeShoppingBotSlackMessageSender {
private static final String SLACK_SECRET_UNIT_NAME = "shopping-crawler";
private final SlackSecretProperties.SlackSecretPropertiesUnit slackProperties;
private final SlackAPIClient slackAPIClient;
private final String token;
public MongeShoppingBotSlackMessageSender(SlackAPIClient slackAPIClient,
SlackSecretProperties slackSecretProperties) {
this.slackAPIClient = slackAPIClient;
this.slackProperties = slackSecretProperties.find(SLACK_SECRET_UNIT_NAME);
this.token = slackProperties.getToken();
}
public SlackMessageDTO ofMessageTemplate() {
return SlackMessageDTO.builder()
.channel(slackProperties.getChannel())
.iconEmoji(slackProperties.getIconEmoji())
.username(slackProperties.getUsername())
.build();
}
public SlackMessageDTO ofMessage(String text) {
return SlackMessageDTO.builder()
.channel(slackProperties.getChannel())
.iconEmoji(slackProperties.getIconEmoji())
.username(slackProperties.getUsername())
.text(text)
.build();
}
}

View File

@ -1,45 +0,0 @@
package com.myoa.engineering.crawl.shopping.infra.client.slack;
import lombok.extern.slf4j.Slf4j;
/**
* SlackMessageSender
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Slf4j
public class SlackMessageSender { /* implements MessageSender<SlackMessageDTO> {*/
/*
private static final String SLACK_API_URL = "https://slack.com/api";
private final WebClient webClient;
public SlackMessageSender(String apiSecret) {
this.webClient = WebClient.builder()
.baseUrl(SLACK_API_URL)
.defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + apiSecret)
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_UTF8_VALUE)
.defaultHeader(HttpHeaders.ACCEPT_CHARSET, "UTF-8")
.filter(WebClientFilterFactory.logResponse())
.build();
}
@Override
public Mono<String> sendMessage(SlackMessageDTO message) {
return webClient.post()
.uri("/chat.postMessage")
.bodyValue(message)
.exchangeToMono(e -> e.bodyToMono(String.class))
.publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> {
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
return Mono.empty();
});
}
*/
}

View File

@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel; import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel;
import com.myoa.engineering.crawl.shopping.event.ArticleUpsertEvent; import com.myoa.engineering.crawl.shopping.event.ArticleUpsertEvent;
import com.myoa.engineering.crawl.shopping.infra.repository.v2.ArticleRepository; import com.myoa.engineering.crawl.shopping.infra.repository.v2.ArticleRepository;
import jakarta.transaction.Transactional; import javax.transaction.Transactional;
import org.springframework.context.ApplicationEventPublisher; import org.springframework.context.ApplicationEventPublisher;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;

View File

@ -1,32 +0,0 @@
package com.myoa.engineering.crawl.shopping.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* MessageSenderService
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-21
*
*/
@Slf4j
@Service
public class MessageSenderService {
/*
private final MessageSenderAPIClient messageSenderAPIClient;
public MessageSenderService(MessageSenderAPIClient messageSenderAPIClient) {
this.messageSenderAPIClient = messageSenderAPIClient;
}
public String sendMessageToSlack(PpomppuArticle article) {
return messageSenderAPIClient.sendMessageToSlack(PpomppuArticleTransformer.TRANSFORM_TO_MESSAGE_DTO.apply(article));
}
public String sendMessageToSlack(List<PpomppuArticle> articles) {
return messageSenderAPIClient.sendMessageToSlack(PpomppuArticleTransformer.transform(articles));
}
*/
}

View File

@ -48,7 +48,10 @@ public class PpomppuArticleService {
// save PpomppuBoardFeedStatus // save PpomppuBoardFeedStatus
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName); Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
log.info("boardName: {}, isPresent?: {}", boardName, boardFeedStatus.isPresent()); log.info("[save] boardName: {}, isPresent?: {}, latestArticleId: {}",
boardName, boardFeedStatus.isPresent(), latestArticleId);
log.info("[save] articles count: {}, article ids: {}",
articles.size(), articles.stream().map(PpomppuArticle::getArticleId).toArray());
boardFeedStatus.ifPresentOrElse(e -> { boardFeedStatus.ifPresentOrElse(e -> {
if (latestArticleId.longValue() > 0L) { if (latestArticleId.longValue() > 0L) {
e.updateArticleId(latestArticleId); e.updateArticleId(latestArticleId);

View File

@ -1,57 +0,0 @@
package com.myoa.engineering.crawl.shopping.service;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* PpomppuFeedService
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Slf4j
@Component
public class PpomppuFeedService {
/*
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
}
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
final Mono<Element> tbody = extractTbodyFromHtml(html);
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
.map(e -> e.updateBoardName(boardName))
// .doOnNext(e -> log.info("parsed Result: {}", e))
.collectList();
}
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
return html.map(Jsoup::parse)
.mapNotNull(e -> e.getElementById("revolution_main_table"))
.map(e -> e.getElementsByTag("tbody"))
// .doOnNext(e -> log.info("tbody - {}", e.html()))
.map(e -> e.stream()
.findFirst()
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
}
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
}
private PpomppuArticle convertFromElement(Element element) {
return PpomppuArticleParser.toArticle(element.getElementsByTag("td"));
}
*/
}

View File

@ -2,11 +2,11 @@ spring:
config: config:
activate: activate:
on-profile: development on-profile: development
# import: import:
# - "configserver:http://192.168.0.100:20085" - "configserver:http://192.168.0.100:11080"
server: server:
port: 20080 port: 20081
# import: optional:configserver:http://localhost:11080 # can be start up even config server was not found. # import: optional:configserver:http://localhost:11080 # can be start up even config server was not found.

View File

@ -25,4 +25,4 @@ management:
endpoints: endpoints:
web: web:
exposure: exposure:
include: refresh include: refresh,health

View File

@ -1,118 +1,6 @@
package com.myoa.engineering.crawl.shopping.crawlhandler.parser; package com.myoa.engineering.crawl.shopping.crawlhandler.parser;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import com.myoa.engineering.crawl.shopping.util.DateTimeUtils;
import com.myoa.engineering.crawl.shopping.util.NumberUtils;
import com.myoa.engineering.crawl.shopping.util.TestDataUtils;
import io.micrometer.core.instrument.util.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import java.time.ZonedDateTime;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class PpomppuArticleParserV2Test { class PpomppuArticleParserV2Test {
@Test
void test1() {
String data = TestDataUtils.fileToString("testdata/zboard/file1.html");
Document document = Jsoup.parse(data);
Elements trList = document.getElementById("revolution_main_table").getElementsByTag("tr");
List<PpomppuArticle> articles = trList.stream()
.filter(this::isRealArticle)
.map(this::parse)
.toList();
System.out.println(articles.size());
System.out.println(articles.get(0));
System.out.println(articles.get(1));
}
private boolean isRealArticle(Element tr) {
Elements tdList = tr.getElementsByTag("td");
if (tdList.size() != 6) {
return false;
}
if (!hasOnlyNumeric(tdList.get(0))) {
return false;
}
return true;
}
Pattern pattern_numeric = Pattern.compile("\\d+");
private boolean hasOnlyNumeric(Element td) {
return pattern_numeric.matcher(td.text()).matches();
}
public PpomppuArticle parse(Element tr) {
Elements tdList = tr.getElementsByTag("td");
Long articleId = Long.parseLong(tdList.get(0).text());
String title = tdList.get(2).text();
String articleUrl = parseArticleUrl(tdList.get(2).getElementsByTag("a").attr("href"));
PpomppuBoardName boardName = parseBoardName(title);
Integer recommended = parseRecommended(tdList.get(4));
Integer hit = NumberUtils.parseInt(tdList.get(5).text(), 0);
ZonedDateTime registeredAt = DateTimeUtils.parse(tdList.get(3).text());
return PpomppuArticle.builder()
.articleId(articleId)
.title(title)
.boardName(boardName)
.articleUrl(articleUrl)
.recommended(recommended)
.hit(hit)
.registeredAt(registeredAt)
.build();
}
public Long parseArticleId(Element td) {
return Long.parseLong(td.text().trim());
}
public static Integer parseRecommended(Element td) {
final String voteString = td.text();
if (StringUtils.isEmpty(voteString)) {
return null;
}
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
int recommended = voteUp - voteDown;
return recommended;
}
public static String parseArticleUrl(String data) {
return PpomppuBoardName.ofViewPageUrl(data);
}
Pattern patternBoardName = Pattern.compile("\\[(.+?)\\]");
public PpomppuBoardName parseBoardName(String fullTitle) {
Matcher matcher = patternBoardName.matcher(fullTitle);
String lastMatched = null;
while (matcher.find()) {
lastMatched = matcher.group(1);
}
return PpomppuBoardName.ofBoardName(lastMatched, true);
}
@Test
void test2() {
PpomppuBoardName boardName = parseBoardName("[자사몰]푸마 메쉬 폼스트라이프 [에디션] 5종 [세트] (18,220원/무료)6 [의류/잡화]");
System.out.println(boardName);
}
} }

View File

@ -0,0 +1,67 @@
package com.myoa.engineering.crawl.shopping.support.dto;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import java.io.Serializable;
import java.time.Instant;
import java.util.List;
/**
* SimpleMessageDTO
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-21
*/
@Getter
@NoArgsConstructor
public class BlockMessageDTO implements Serializable {
private static final long serialVersionUID = -6992039884035135523L;
private PpomppuBoardName boardName;
private String title;
private List<Block> blocks;
private String url;
private Instant publishedAt;
private Instant requestedAt;
@Builder
public BlockMessageDTO(PpomppuBoardName boardName, String title,
List<Block> blocks, String url, Instant publishedAt,
Instant requestedAt) {
this.boardName = boardName;
this.title = title;
this.blocks = blocks;
this.url = url;
this.publishedAt = publishedAt;
this.requestedAt = requestedAt;
}
@Getter
@NoArgsConstructor
public static class Block implements Serializable {
private static final long serialVersionUID = 3633781631892663709L;
private String text;
private String imageUrl;
private String altText;
public Block(String text, String imageUrl, String altText) {
this.text = text;
this.imageUrl = imageUrl;
this.altText = altText;
}
}
public static Block createBlock(String text, String imageUrl) {
return new Block(text, imageUrl, "");
}
public static Block createBlock(String text, String imageUrl, String altText) {
return new Block(text, imageUrl, altText);
}
}

View File

@ -0,0 +1,18 @@
package com.myoa.engineering.crawl.shopping.support.util;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
/**
* DateUtil
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2022-01-02
*
*/
public final class DateUtil {
private DateUtil() { }
public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
.withZone(ZoneId.of("Asia/Seoul"));
}

View File

@ -2,6 +2,7 @@ package com.myoa.engineering.crawl.shopping.support.util;
import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser.Feature; import com.fasterxml.jackson.core.JsonParser.Feature;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.SerializationFeature;
@ -58,6 +59,15 @@ public final class ObjectMapperFactory {
return objectMapper; return objectMapper;
} }
public static String writeAsString(Object o) {
try {
return defaultMapper().writeValueAsString(o);
} catch (JsonProcessingException e) {
e.printStackTrace();
return null;
}
}
/** /**
* Copy from {@link GenericJackson2JsonRedisSerializer.NullValueSerializer}. * Copy from {@link GenericJackson2JsonRedisSerializer.NullValueSerializer}.