Skip to content

Commit

Permalink
openeuler新增sig、etherpad数据
Browse files Browse the repository at this point in the history
  • Loading branch information
2511689622 committed Dec 13, 2024
1 parent d576d8e commit 374bac4
Show file tree
Hide file tree
Showing 8 changed files with 414 additions and 20 deletions.
10 changes: 10 additions & 0 deletions openeuler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@
<artifactId>es-client</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.17.3</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>

</dependencies>

Expand Down
81 changes: 77 additions & 4 deletions openeuler/src/main/java/App.java
Original file line number Diff line number Diff line change
@@ -1,35 +1,108 @@
import java.io.File;
import java.util.*;
import java.util.stream.Collectors;

import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.*;
import java.util.stream.Collectors;
import etherpad.EPLiteClient;

public class App {
private static final String TARGET = System.getenv("TARGET");

private static final String TARGET_SIG = System.getenv("TARGET") + "/sig";

private static final String APPLICATION_PATH = System.getenv("APPLICATION_PATH");

private static final String MAPPING_PATH = System.getenv("MAPPING_PATH");

private static final String SIG_PATH = System.getenv("SIG_PATH");

private static final String ETHERPAD_PATH = System.getenv("ETHERPAD_PATH");

private static final String INDEX_PREFIX = "openeuler_articles";

private static final String ETHERPAD_URL = System.getenv("ETHERPAD_URL");

private static final String ETHERPAD_KEY = System.getenv("ETHERPAD_KEY");

private static final Logger logger = LoggerFactory.getLogger(App.class);

public static void main(String[] args) {
try {
PublicClient.CreateClientFormConfig(APPLICATION_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_zh", MAPPING_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_en", MAPPING_PATH);
sigData();
etherpadData();
fileDate();
} catch (Exception e) {
logger.error(e.getMessage());
logger.error(e.toString());
}

logger.info("import end");
System.exit(0);
}

public static void sigData() throws Exception {
File indexFile = new File(TARGET_SIG);
if (!indexFile.exists()) {
logger.info("%s folder does not exist%n", indexFile.getPath());
return;
}

logger.info("begin to update sig data");
Collection<File> listFiles = new ArrayList<>();
File[] subdirectories = indexFile.listFiles((dir, name) -> new File(dir, name).isDirectory());
if (subdirectories != null) {
for (File subdir : subdirectories) {
Collection<File> filesInSubdirectory = FileUtils.listFiles(subdir, new String[]{"yaml"}, false);
listFiles.addAll(filesInSubdirectory);
}
} else {
logger.info("sig data is null");
return;
}

for (File paresFile : listFiles) {
try {
// sig information has two language
Map<String, Object> escape = Parse.parseSigYaml(paresFile, "zh", SIG_PATH);
if (null != escape) {
PublicClient.insert(escape, INDEX_PREFIX + "_" + escape.get("lang"));
} else {
logger.info("parse null : " + paresFile.getPath());
}
} catch (Exception e) {
logger.error(paresFile.getPath());
logger.error("sig data imported error {}", e.getMessage());
}
}
logger.info("sig data imported end");
}

public static void etherpadData(){
EPLiteClient client = new EPLiteClient(ETHERPAD_URL, ETHERPAD_KEY);

Map result = client.listAllPads();
List padIds = (List) result.get("padIDs");

logger.info("begin to update etherpad data");
for(Object padId : padIds) {
Map<String, Object> resMap = client.getText((String) padId);
if(resMap.containsKey("text")) {
try {
Map<String, Object> escape = Parse.parseEtherPad(resMap.get("text"), padId.toString(), ETHERPAD_PATH);
if (null != escape) {
PublicClient.insert(escape, INDEX_PREFIX + "_" + escape.get("lang"));
}
} catch (Exception e) {
logger.error("etherpad data imported error {}", e.getMessage());
}
}
}
logger.info("etherpad data imported end");
}

public static void fileDate() throws Exception {
Expand Down
88 changes: 72 additions & 16 deletions openeuler/src/main/java/Parse.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
import java.io.*;
import java.net.*;
import java.nio.charset.StandardCharsets;
import java.security.NoSuchAlgorithmException;
import java.security.NoSuchProviderException;
import java.security.SecureRandom;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
Expand All @@ -17,18 +28,6 @@
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.*;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Parse {
public static final String BASEPATH = System.getenv("TARGET") + "/";
public static final String BLOG = "blog";
Expand Down Expand Up @@ -679,8 +678,8 @@ private static String ReadInput(InputStream is) throws IOException {
return sbf.toString();
}

public static Map<String, String> getRandomIpHeader() {
Random random = new Random(System.currentTimeMillis());
public static Map<String, String> getRandomIpHeader() throws NoSuchAlgorithmException, NoSuchProviderException {
SecureRandom random = SecureRandom.getInstance("SHA1PRNG", "SUN");
String ip = (random.nextInt(255) + 1) + "." + (random.nextInt(255) + 1) + "." + (random.nextInt(255) + 1) + "."
+ (random.nextInt(255) + 1);
HashMap<String, String> header = new HashMap<>();
Expand All @@ -690,4 +689,61 @@ public static Map<String, String> getRandomIpHeader() {
header.put("REMOTE_ADDR", ip);
return header;
}

public static Map<String, Object> parseSigYaml(File paresFile, String lang, String sigPath) throws Exception{
Yaml yaml = new Yaml();
Map<String, Object> resMap = new HashMap<>();
try (InputStream inputStream = new FileInputStream(paresFile)) {
Map<String, Object> dataMap = yaml.load(inputStream);
resMap.put("title", dataMap.get("name"));
resMap.put("lang", lang);
resMap.put("type", "sig");
String path = sigPath + lang + "/sig/" + dataMap.get("name");
resMap.put("path", path);
String textContent = "maintainers: ";
if (dataMap.containsKey("maintainers") && dataMap.get("maintainers") instanceof List) {
List<?> maintainersList = (List<?>) dataMap.get("maintainers");
for (Object maintainerObj : maintainersList) {
if (maintainerObj instanceof Map) {
Map<String, Object> maintainerMap = (Map<String, Object>) maintainerObj;
textContent += maintainerMap.getOrDefault("name","") + ",";
textContent += maintainerMap.getOrDefault("gitee_id","") + ";";
}
}
}
textContent += "\n" + "committers: ";
if (dataMap.containsKey("repositories") && dataMap.get("repositories") instanceof List) {
List<?> reposList = (List<?>) dataMap.get("repositories");
for (Object repoObj : reposList) {
if (repoObj instanceof Map) {
Map<String, Object> repoMap = (Map<String, Object>) repoObj;
if(repoMap.containsKey("committers") && repoMap.get("committers") instanceof List){
List<?> committersList = (List<?>) repoMap.get("committers");
for (Object committerObj : committersList) {
if (committerObj instanceof Map) {
Map<String, Object> committerMap = (Map<String, Object>) committerObj;
textContent += committerMap.getOrDefault("name","") + ",";
textContent += committerMap.getOrDefault("gitee_id","") + ";";
}
}
}
}
}
}
resMap.put("textContent", textContent);
} catch (IOException e) {
logger.error("sig yaml parse error: {}", e.getMessage());
}
return resMap;
}

public static Map<String, Object> parseEtherPad(Object text, String padId, String etherpadPath) {
Map<String, Object> resMap = new HashMap<>();
resMap.put("textContent", text.toString());
resMap.put("title", padId);
resMap.put("path", "p/" + etherpadPath + padId);
resMap.put("lang", "zh");
resMap.put("type", "pad");
return resMap;
}
}
25 changes: 25 additions & 0 deletions openeuler/src/main/java/etherpad/EPLiteClient.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package etherpad;

import java.util.HashMap;
import java.util.Map;

public class EPLiteClient {
private static final String DEFAULT_API_VERSION = "1.2.13";
private static final String DEFAULT_ENCODING = "UTF-8";
private final EPLiteConnection connection;

public EPLiteClient(String url, String apiKey) {
this.connection = new EPLiteConnection(url, apiKey, DEFAULT_API_VERSION, DEFAULT_ENCODING);
}

public Map listAllPads() {
return this.connection.get("listAllPads");
}

public Map getText(String padId) {
Map<String,Object> args = new HashMap<>();
args.put("padID", padId);
return this.connection.get("getText", args);
}

}
Loading

0 comments on commit 374bac4

Please sign in to comment.