Add Fake430 resolver

This commit is contained in:
woozu-shin 2024-05-14 17:41:29 +09:00
parent dae3dd52e4
commit b83db38b61
13 changed files with 289 additions and 42 deletions

View File

@ -24,6 +24,13 @@ dependencies {
implementation 'org.ahocorasick:ahocorasick:0.6.3'
implementation "com.slack.api:slack-api-client:1.39.1"
// implementation "io.github.resilience4j:resilience4j-spring-boot3:2.2.0"
implementation 'io.github.resilience4j:resilience4j-all:2.2.0'
implementation "io.github.resilience4j:resilience4j-feign:2.2.0"
implementation "org.springframework.cloud:spring-cloud-starter-circuitbreaker-resilience4j"
// implementation 'io.github.openfeign:feign-okhttp:13.1'
implementation 'io.github.openfeign:feign-jackson:13.2'
annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor'
annotationProcessor 'org.projectlombok:lombok'

View File

@ -1,6 +1,7 @@
package com.myoa.engineering.crawl.shopping.configuration.feign;
import feign.RequestInterceptor;
import io.github.resilience4j.circuitbreaker.annotation.CircuitBreaker;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ -9,7 +10,26 @@ public class FmkoreaClientFeignConfiguration {
@Bean
public RequestInterceptor requestInterceptor() {
// TODO ignore 4xx
return requestTemplate -> new FakeUserAgentInterceptor().apply(requestTemplate);
}
/*
@Bean
public FmkoreaBoardClient fmkoreaBoardClient(RateLimiterRegistry rateLimiterRegistry,
CircuitBreakerRegistry circuitBreakerRegistry,
RequestInterceptor requestInterceptor) {
FeignDecorators decorators = FeignDecorators.builder()
.withCircuitBreaker(circuitBreakerRegistry.circuitBreaker("rateLimit"))
.withRateLimiter(rateLimiterRegistry.rateLimiter("rateLimit"))
.build();
return Resilience4jFeign.builder(decorators)
.requestInterceptor(requestInterceptor)
.target(FmkoreaBoardClient.class, "https://www.fmkorea.com");
}
*/
}

View File

@ -0,0 +1,52 @@
package com.myoa.engineering.crawl.shopping.configuration.resilience;
import io.github.resilience4j.circuitbreaker.CircuitBreaker;
import io.github.resilience4j.circuitbreaker.CircuitBreakerConfig;
import io.github.resilience4j.circuitbreaker.CircuitBreakerRegistry;
import io.github.resilience4j.core.RegistryStore;
import io.github.resilience4j.core.registry.InMemoryRegistryStore;
import io.github.resilience4j.ratelimiter.RateLimiter;
import io.github.resilience4j.ratelimiter.RateLimiterConfig;
import io.github.resilience4j.ratelimiter.RateLimiterRegistry;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.time.Duration;
@Configuration
public class RateLimitConfiguration {
@Bean
public RateLimiterRegistry rateLimiterRegistry() {
RegistryStore<RateLimiter> stores = new InMemoryRegistryStore<>();
// TODO 개별 config 에서 등록하도록 변경
RateLimiterConfig rateLimiterConfig = RateLimiterConfig.custom()
.limitRefreshPeriod(Duration.ofMillis(500)) // 0.5 seconds
.limitForPeriod(1) // number of permits in a refresh period
.build();
stores.putIfAbsent("fmkoreaAvoid429", RateLimiter.of("fmkoreaAvoid429", rateLimiterConfig));
return RateLimiterRegistry.custom()
.withRateLimiterConfig(RateLimiterConfig.ofDefaults())
.withRegistryStore(stores)
.build();
}
@Bean
public CircuitBreakerRegistry circuitBreakerRegistry() {
RegistryStore<CircuitBreaker> stores = new InMemoryRegistryStore<>();
CircuitBreakerConfig circuitBreakerConfig = CircuitBreakerConfig.custom()
.slidingWindowSize(1)
.build();
stores.putIfAbsent("fmkoreaAvoid429", CircuitBreaker.of("fmkoreaAvoid429", circuitBreakerConfig));
return CircuitBreakerRegistry.custom()
.withCircuitBreakerConfig(CircuitBreakerConfig.ofDefaults())
.withRegistryStore(stores)
.build();
}
}

View File

@ -1,28 +1,36 @@
package com.myoa.engineering.crawl.shopping.controller;
import com.myoa.engineering.crawl.shopping.crawlhandler.CrawlHandler;
import com.myoa.engineering.crawl.shopping.infra.client.fmkorea.FmkoreaBoardClient;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import com.slack.api.methods.MethodsClient;
import com.slack.api.methods.SlackApiException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Slf4j
@RestController
@RequestMapping("/api/v1/exploit")
public class TestAPIController {
private final MethodsClient methodsClient;
private final List<CrawlHandler> crawlHandlers;
private final FmkoreaBoardClient fmkoreaBoardClient;
public TestAPIController(MethodsClient methodsClient, List<CrawlHandler> crawlHandlers) {
public TestAPIController(MethodsClient methodsClient,
List<CrawlHandler> crawlHandlers,
FmkoreaBoardClient fmkoreaBoardClient) {
this.methodsClient = methodsClient;
this.crawlHandlers = crawlHandlers;
this.fmkoreaBoardClient = fmkoreaBoardClient;
}
@GetMapping("/triggers")
@ -32,6 +40,28 @@ public class TestAPIController {
.forEach(CrawlHandler::handle);
}
@GetMapping("/ratelimiter")
public void triggerExploit() {
log.info("will be called page 1");
fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(1));
log.info("called page 1");
// log.info("will be called page 2");
// fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(2));
// log.info("called page 2");
//
// log.info("will be called page 3");
// fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(3));
// log.info("called page 3");
}
private Map<String, String> generateRequestParams(int pageId) {
Map<String, String> params = new HashMap<>();
params.put("mid", "hotdeal");
params.put("page", String.valueOf(pageId));
return params;
}
@GetMapping("/test-message")
public void testMessage() throws SlackApiException, IOException {
methodsClient.chatPostMessage(req -> req

View File

@ -36,10 +36,14 @@ public class FmkoreaCrawlHandler implements CrawlHandler {
@Override
public void handle() {
String boardHtmlPage1 = fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(1));
String fakeHtml = fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(1, null));
String cookie = FmkoreaFake430Resolver.resolveFake430(fakeHtml);
String boardHtmlPage1 = fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(1, cookie));
List<Article> parsedPage1 = fmkoreaArticleParser.parse(boardHtmlPage1);
String boardHtmlPage2 = fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(2));
String boardHtmlPage2 = fmkoreaBoardClient.getBoardHtml("/index.php", generateRequestParams(2, cookie));
List<Article> parsedPage2 = fmkoreaArticleParser.parse(boardHtmlPage2);
List<Article> merged = Stream.of(parsedPage1, parsedPage2)
@ -50,10 +54,11 @@ public class FmkoreaCrawlHandler implements CrawlHandler {
articleCommandService.upsert(merged);
}
private Map<String, String> generateRequestParams(int pageId) {
private Map<String, String> generateRequestParams(int pageId, String cookie) {
Map<String, String> params = new HashMap<>();
params.put("mid", "hotdeal");
params.put("page", String.valueOf(pageId));
params.put("Cookie", cookie);
return params;
}
}

View File

@ -0,0 +1,88 @@
package com.myoa.engineering.crawl.shopping.crawlhandler;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.CollectionType;
import com.myoa.engineering.crawl.shopping.util.ObjectMapperFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Base64;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class FmkoreaFake430Resolver {
private static final Pattern PATTERN_DOCUMENT = Pattern.compile("var .+?\\s*=\\s*(\\[.+?\\];)");
private static final Pattern PATTERN_COOKIE = Pattern.compile("escape\\('(.+?)'\\)");
private static final ObjectMapper MAPPER = ObjectMapperFactory.DEFAULT_MAPPER;
private static final CollectionType COLLECTION_TYPE = MAPPER.getTypeFactory().constructCollectionType(List.class, String.class);
private static final DateTimeFormatter DATE_TIME_FORMATTER_COOKIE_LIFE = DateTimeFormatter.ofPattern("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.ENGLISH);
private FmkoreaFake430Resolver() {
}
public static String resolveFake430(String fakeHtml) {
Document parse = Jsoup.parse(fakeHtml);
String javascript = parse.select("script").html();
String cookieHtml = extractEncodedCookieHtml(javascript);
try {
List<String> chunks = MAPPER.readValue(cookieHtml, COLLECTION_TYPE);
String decodedhtml = decodeHtmlChunks(chunks);
String cookieHexValue = extractCookieHexValue(decodedhtml);
return generateLiteTimeCookie(cookieHexValue);
} catch (Exception e) {
return null;
}
}
private static String extractEncodedCookieHtml(String javascript) {
final Matcher matcher = PATTERN_DOCUMENT.matcher(javascript);
if (matcher.find()) {
return matcher.group(1);
}
return "";
}
private static String decodeHtmlChunks(List<String> chunks) {
return chunks.stream()
.map(e -> new String(Base64.getDecoder().decode(e.substring(3, e.length() - 3))))
.map(e -> (char) (e.charAt(0) - 3 + 256) % 256)
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append).toString();
}
private static String extractCookieHexValue(String decodedHtml) {
final Matcher matcher = PATTERN_COOKIE.matcher(decodedHtml);
if (matcher.find()) {
return matcher.group(1);
}
return "";
}
private static String escape(String input) {
StringBuilder result = new StringBuilder();
for (char ch : input.toCharArray()) {
if (Character.isLetterOrDigit(ch) || ch == '-' || ch == '_' || ch == '.' || ch == '~') {
result.append(ch);
} else {
result.append(String.format("%%%02X", (int) ch));
}
}
return result.toString();
}
public static String generateLiteTimeCookie(String cookieHexValue) {
LocalDateTime ldt = LocalDateTime.now().plusDays(1L);
String cookie = "lite_year=" + escape(cookieHexValue) +
"; expires=" + ldt.format(DATE_TIME_FORMATTER_COOKIE_LIFE) + "; path=/";
return cookie;
}
}

View File

@ -1,6 +1,7 @@
package com.myoa.engineering.crawl.shopping.domain.model;
import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel;
import com.slack.api.methods.response.chat.ChatPostMessageResponse;
import lombok.*;
import java.util.List;
@ -15,13 +16,7 @@ import java.util.stream.Collectors;
public class UserNotifyModel {
private String slackId;
private List<ArticleModel> articles;
public static UserNotifyModel of(String slackId, List<ArticleModel> articles) {
return UserNotifyModel.builder()
.slackId(slackId)
.articles(articles)
.build();
}
private ChatPostMessageResponse chatPostMessageResponse;
public String toCompositedMessage() {
return wrapUserId() + "\n" +

View File

@ -9,6 +9,8 @@ import com.myoa.engineering.crawl.shopping.service.AppUserQueryService;
import com.myoa.engineering.crawl.shopping.service.SubscribedKeywordCacheService;
import com.myoa.engineering.crawl.shopping.service.slack.UserNotifyService;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import com.slack.api.methods.request.chat.ChatPostMessageRequest;
import com.slack.api.methods.response.chat.ChatPostMessageResponse;
import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Component;
@ -38,29 +40,35 @@ public class ArticleUpsertEventListener {
Map<CrawlTarget, List<ArticleModel>> articleMap =
((List<ArticleModel>) event.getSource()).stream()
.collect(Collectors.groupingBy(ArticleModel::getCrawlTarget));
articleMap.forEach(this::notifyMessage);
Map<CrawlTarget, ChatPostMessageResponse> allArticleNotifiedResultMap =
articleMap.entrySet()
.stream()
.collect(Collectors.toMap(Map.Entry::getKey, e -> notifyMessage(e.getKey(), e.getValue())));
List<AppUserModel> appUsers = appUserQueryService.findAll();
appUsers.stream()
.filter(AppUserModel::getEnabled)
.map(user -> {
List<ArticleModel> filteredArticles = handleAhoCorasick(articleMap)
.apply(subscribedKeywordCacheService.getSubscribedKeywordsCached(user.getSlackId()));
return UserNotifyModel.of(user.getSlackId(), filteredArticles);
.flatMap(user -> {
Map<CrawlTarget, SubscribedKeywordAggregatedModel> subscribedKeywords =
subscribedKeywordCacheService.getSubscribedKeywordsCached(user.getSlackId());
return subscribedKeywords.entrySet()
.stream()
.map(entry -> {
List<ArticleModel> filtered = doAhoCorasick(articleMap.get(entry.getKey())).apply(entry.getValue());
return UserNotifyModel.builder()
.slackId(user.getSlackId())
.articles(filtered)
.chatPostMessageResponse(allArticleNotifiedResultMap.get(entry.getKey()))
.build();
});
})
.forEach(this::notifyMessage);
}
private Function<Map<CrawlTarget, SubscribedKeywordAggregatedModel>, List<ArticleModel>> handleAhoCorasick(
Map<CrawlTarget, List<ArticleModel>> articleMap) {
return userTrieModel -> userTrieModel
.entrySet()
.stream().filter(e -> articleMap.containsKey(e.getKey()))
.map((entry) -> filterAhocorasick(articleMap.get(entry.getKey()), entry.getValue()))
.flatMap(List::stream)
.toList();
private Function<SubscribedKeywordAggregatedModel, List<ArticleModel>> doAhoCorasick(
List<ArticleModel> articles) {
return userTrieModel -> filterAhocorasick(articles, userTrieModel);
}
private List<ArticleModel> filterAhocorasick(List<ArticleModel> articles,
@ -70,15 +78,16 @@ public class ArticleUpsertEventListener {
.parseText(article.getTitle())
.isEmpty())
.toList();
//ArticleUpsertEventListener::printArticle
}
private void notifyMessage(CrawlTarget crawlTarget, List<ArticleModel> articles) {
private ChatPostMessageResponse notifyMessage(CrawlTarget crawlTarget, List<ArticleModel> articles) {
var sb = new StringBuilder();
sb.append("[").append(crawlTarget.getAlias()).append("]\n");
articles.forEach(article -> sb.append(article.convertArticletoMessage()).append("\n"));
sb.append("-----------------------------------\n");
userNotifyService.notify(sb.toString());
ChatPostMessageRequest request = userNotifyService.generateMessage(sb.toString()).build();
return userNotifyService.notify(request);
}
private void notifyMessage(UserNotifyModel userNotifyModel) {
@ -86,7 +95,11 @@ public class ArticleUpsertEventListener {
if (userNotifyModel.getArticles().isEmpty()) {
return;
}
userNotifyService.notify(userNotifyModel.toCompositedMessage());
ChatPostMessageRequest request = userNotifyService.generateMessage(userNotifyModel.toCompositedMessage())
.threadTs(userNotifyModel.getChatPostMessageResponse().getTs())
.build();
userNotifyService.notify(request);
}
}

View File

@ -1,6 +1,8 @@
package com.myoa.engineering.crawl.shopping.infra.client.fmkorea;
import com.myoa.engineering.crawl.shopping.configuration.feign.FmkoreaClientFeignConfiguration;
import io.github.resilience4j.circuitbreaker.annotation.CircuitBreaker;
import io.github.resilience4j.ratelimiter.annotation.RateLimiter;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.cloud.openfeign.SpringQueryMap;
import org.springframework.web.bind.annotation.GetMapping;
@ -8,10 +10,11 @@ import org.springframework.web.bind.annotation.PathVariable;
import java.util.Map;
@FeignClient(value = "fmkorea-board-client", url = "https://www.fmkorea.com",
configuration = FmkoreaClientFeignConfiguration.class)
@FeignClient(value = "fmkorea-board-client", url = "https://www.fmkorea.com", configuration = FmkoreaClientFeignConfiguration.class)
public interface FmkoreaBoardClient {
@CircuitBreaker(name = "fmkoreaAvoid429")
@RateLimiter(name = "fmkoreaAvoid429")
@GetMapping("{boardLink}")
String getBoardHtml(@PathVariable("boardLink") String boardLink,
@SpringQueryMap Map<String, String> params);

View File

@ -19,12 +19,12 @@ public class SubscribedKeywordCacheService {
this.subscribedKeywordQueryService = subscribedKeywordQueryService;
}
@Cacheable(cacheNames = "subscribe.keywords", key = "#userId + '_' + #crawlTarget.name()")
public SubscribedKeywordAggregatedModel getSubscribedKeywordsCached(String userId, CrawlTarget crawlTarget) {
@Cacheable(cacheNames = "subscribe.keywords", key = "#slackId + '_' + #crawlTarget.name()")
public SubscribedKeywordAggregatedModel getSubscribedKeywordsCached(String slackId, CrawlTarget crawlTarget) {
System.out.println("getSubscribedKeywordsCached");
List<String> keywords = subscribedKeywordQueryService.findByUserWithTarget(userId, crawlTarget)
List<String> keywords = subscribedKeywordQueryService.findByUserWithTarget(slackId, crawlTarget)
.stream().map(SubscribedKeyword::getKeyword).toList();
return SubscribedKeywordAggregatedModel.of(userId, crawlTarget, keywords);
return SubscribedKeywordAggregatedModel.of(slackId, crawlTarget, keywords);
}
@Cacheable(cacheNames = "subscribe.keywords", key = "#slackId")

View File

@ -2,6 +2,8 @@ package com.myoa.engineering.crawl.shopping.service.slack;
import com.myoa.engineering.crawl.shopping.configuration.slack.properties.SlackSecretProperties;
import com.slack.api.methods.MethodsClient;
import com.slack.api.methods.request.chat.ChatPostMessageRequest;
import com.slack.api.methods.response.chat.ChatPostMessageResponse;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
@ -20,14 +22,21 @@ public class UserNotifyService {
this.methodsClient = methodsClient;
}
public void notify(String message) {
public ChatPostMessageResponse notify(ChatPostMessageRequest request) {
try {
methodsClient.chatPostMessage(req -> req
.channel(slackSecretProperties.getChannel())
.username(slackSecretProperties.getUsername())
.text(message));
return methodsClient.chatPostMessage(request);
} catch (Exception e) {
log.warn("Failed. message: {}", message, e);
log.warn("Failed. message: {}", request, e);
ChatPostMessageResponse response = new ChatPostMessageResponse();
response.setOk(false);
return response;
}
}
public ChatPostMessageRequest.ChatPostMessageRequestBuilder generateMessage(String message) {
return ChatPostMessageRequest.builder()
.channel(slackSecretProperties.getChannel())
.username(slackSecretProperties.getUsername())
.text(message);
}
}

View File

@ -0,0 +1,16 @@
package com.myoa.engineering.crawl.shopping.crawlhandler;
import com.myoa.engineering.crawl.shopping.util.TestDataUtils;
import org.junit.jupiter.api.Test;
class FmkoreaCrawlHandlerTest {
@Test
void resolve_fake430() {
String fakeHtml = TestDataUtils.fileToString("testdata/fmkorea/fake430.html");
FmkoreaFake430Resolver.resolveFake430(fakeHtml);
}
}

File diff suppressed because one or more lines are too long