Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mindspore增加论文案例视频搜索源 #8

Merged
merged 1 commit into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions mindspore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@
<version>1.0-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.17.3</version>
</dependency>

</dependencies>

<build>
Expand Down
68 changes: 68 additions & 0 deletions mindspore/src/main/java/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
public class App {
private static final String TARGET = System.getenv("TARGET");

private static final String TARGET_PAPERS = System.getenv("TARGET") + "/papers";

private static final String TARGET_CASES = System.getenv("TARGET") + "/cases";

private static final String TARGET_COURSES = System.getenv("TARGET") + "/courses";

private static final String APPLICATION_PATH = System.getenv("APPLICATION_PATH");

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm

private static final String MAPPING_PATH = System.getenv("MAPPING_PATH");
Expand All @@ -18,6 +24,7 @@ public static void main(String[] args) {
PublicClient.CreateClientFormConfig(APPLICATION_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_zh", MAPPING_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_en", MAPPING_PATH);
portalData();
fileDate();
} catch (Exception e) {
System.out.println(e.getMessage());
Expand All @@ -28,6 +35,67 @@ public static void main(String[] args) {
System.exit(0);
}

public static void portalData() throws Exception {
File indexPaperFile = new File(TARGET_PAPERS);
File indexCaseFile = new File(TARGET_CASES);
File indexCourseFile = new File(TARGET_COURSES);
if (!indexPaperFile.exists()) {
System.out.printf("%s folder does not exist%n", indexPaperFile.getPath());
return;
}
if (!indexCaseFile.exists()) {
System.out.printf("%s folder does not exist%n", indexCaseFile.getPath());
return;
}
if (!indexCourseFile.exists()) {
System.out.printf("%s folder does not exist%n", indexCourseFile.getPath());
}

System.out.println("begin to update portal data,开始更新");
Map<String, File> hashMap = Map.of("paper", indexPaperFile, "case", indexCaseFile, "course", indexCourseFile);
hashMap.entrySet().forEach(entry -> {
try {
updatePortal(entry.getKey(), entry.getValue());
System.out.println(entry.getKey() + " data import success");
} catch (Exception e) {
e.printStackTrace();
}
});
}

public static void updatePortal(String category, File indexFile) throws Exception {
Collection<File> listFiles = FileUtils.listFiles(indexFile, new String[]{"json"}, true);

for(File paresFile : listFiles) {
try {
List<Map<String, Object>> escapeList = null;
switch (category) {
case "paper":
escapeList = Parse.parsePaperJson(paresFile);
break;
case "case":
escapeList = Parse.parseCaseJson(paresFile);
break;
case "course":
escapeList = Parse.parseCourseJson(paresFile);
break;
default:
break;
}
if (escapeList != null && !escapeList.isEmpty() ) {
for (Map<String, Object> escape : escapeList) {
PublicClient.insert(escape, INDEX_PREFIX + "_" + escape.get("lang"));
}
} else {
System.out.println("parse null : " + paresFile.getPath());
}
} catch (Exception e) {
System.out.println(paresFile.getPath());
System.out.println(e.getMessage());
}
}
}

public static void fileDate() throws Exception {
File indexFile = new File(TARGET);
if (!indexFile.exists()) {
Expand Down
159 changes: 159 additions & 0 deletions mindspore/src/main/java/Parse.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.commons.io.FileUtils;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
Expand Down Expand Up @@ -114,6 +117,162 @@ public static Map<String, Object> parse(File file) throws Exception {
}
return jsonMap;
}
public static List<Map<String, Object>> parsePaperJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);

if(rootNode.isArray()) {
for(JsonNode paperNode : rootNode) {
Map<String, Object> jsonMap = new HashMap<>();
String title = paperNode.get("title").asText();
String desc = paperNode.get("desc").asText();
String domainName = paperNode.get("domainName").asText();
String publishedBy = paperNode.get("publishedBy").asText();
String sourcesName = paperNode.get("sourcesName").asText();
String postedOn = paperNode.get("postedOn").asText();
String href = paperNode.get("href").asText();
String codeLink = paperNode.get("codeLink").asText();
jsonMap.put("title", title);
jsonMap.put("textContent", desc);
jsonMap.put("domainName", domainName);
jsonMap.put("publishedBy", publishedBy);
jsonMap.put("sourcesName", sourcesName);
jsonMap.put("postedOn", postedOn);
jsonMap.put("path", href);
jsonMap.put("codeLink", codeLink);
jsonMap.put("lang", "zh");
jsonMap.put("type", "paper");
ansList.add(jsonMap);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static List<Map<String, Object>> parseCaseJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);

if(rootNode.isArray()) {
for(JsonNode caseNode : rootNode) {
Map<String, Object> jsonMap = new HashMap<>();
String title = caseNode.get("title").asText();
String desc = caseNode.get("desc").asText();
String category = caseNode.get("category").asText();
String caseType = caseNode.get("type").asText();
String logoImg = caseNode.get("logoImg").asText();
String href = caseNode.get("href").asText();

String logoImgDark = null;
if (caseNode.get("logoImgDark") != null) {
logoImgDark = caseNode.get("logoImgDark").asText();
}

JsonNode technologiesNode = caseNode.get("technologies");
List<String> technologies = new ArrayList<>();
if (technologiesNode != null && technologiesNode.isArray()) {
for (JsonNode techNode : technologiesNode) {

technologies.add(techNode.asText());
}
} else if (technologiesNode != null) {
technologies.add(technologiesNode.asText());
}

jsonMap.put("title", title);
jsonMap.put("textContent", desc);
jsonMap.put("lang", "zh");
jsonMap.put("type", "case");
jsonMap.put("path", href);
jsonMap.put("category", category);
jsonMap.put("technologies", technologies);
jsonMap.put("logoImg", logoImg);
jsonMap.put("caseType", caseType);
jsonMap.put("logoImgDark", logoImgDark);
ansList.add(jsonMap);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static List<Map<String, Object>> parseCourseJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);
if(rootNode.isArray()) {
for (JsonNode courseNode : rootNode) {
String courseId = (courseNode.get("id") != null) ? courseNode.get("id").asText() : null;
String courseCatalog = (courseNode.get("catalog") != null) ? courseNode.get("catalog").asText() : null;
String courseDescription = (courseNode.get("description") != null) ? courseNode.get("description").asText() : null;
String courseSeries = (courseNode.get("series") != null) ? courseNode.get("series").asText() : null;
String courseClasses = (courseNode.get("classes") != null) ? courseNode.get("classes").asText() : null;
String courseCover = (courseNode.get("cover") != null) ? courseNode.get("cover").asText() : null;
String courseCatalogName = (courseNode.get("catalogName") != null) ? courseNode.get("catalogName").asText() : null;
String courseCatalogDesc = (courseNode.get("catalogDesc") != null) ? courseNode.get("catalogDesc").asText() : null;
JsonNode childrenNodes = courseNode.get("children");
if (childrenNodes != null && childrenNodes.isArray()) {
for (JsonNode childrenNode : childrenNodes) {
String childrenId = (childrenNode.get("id") != null) ? childrenNode.get("id").asText() : null;
String childrenName = (childrenNode.get("name") != null) ? childrenNode.get("name").asText() : null;
String childrenCount = (childrenNode.get("count") != null) ? childrenNode.get("count").asText() : null;
String childrenCoverImg = (childrenNode.get("coverImg") != null) ? childrenNode.get("coverImg").asText() : null;
JsonNode courseListNodes = (childrenNode.get("courseList") != null) ? childrenNode.get("courseList") : null;
if (courseListNodes != null && courseListNodes.isArray()) {
for (JsonNode courseListNode : courseListNodes) {
Map<String, Object> jsonMap = new HashMap<>();
String courseListId = (courseListNode.get("id") != null) ? courseListNode.get("id").asText() : null;
String courseListCategoryId = (courseListNode.get("categoryId") != null) ? courseListNode.get("categoryId").asText() : null;
String courseListTitle = (courseListNode.get("title") != null) ? courseListNode.get("title").asText() : null;
String courseListLable = (courseListNode.get("lable") != null) ? courseListNode.get("lable").asText() : null;
String courseListLang = (courseListNode.get("lang") != null) ? courseListNode.get("lang").asText() : null;
String courseListForm = (courseListNode.get("form") != null) ? courseListNode.get("form").asText() : null;
String courseListVideoType = (courseListNode.get("videoType") != null) ? courseListNode.get("videoType").asText() : null;
String courseListVideoUrl = (courseListNode.get("videoUrl") != null) ? courseListNode.get("videoUrl").asText() : null;
String courseListPlayMin = (courseListNode.get("playMin") != null) ? courseListNode.get("playMin").asText() : null;
jsonMap.put("type", "course");
jsonMap.put("courseListPlayMin", courseListPlayMin);
jsonMap.put("path", courseListVideoUrl);
jsonMap.put("courseListVideoType", courseListVideoType);
jsonMap.put("courseListForm", courseListForm);
jsonMap.put("lang", courseListLang);
jsonMap.put("courseListLable", courseListLable);
jsonMap.put("title", courseListTitle);
jsonMap.put("courseListCategoryId", courseListCategoryId);
jsonMap.put("courseListId", courseListId);
jsonMap.put("childrenCoverImg", childrenCoverImg);
jsonMap.put("childrenCount", childrenCount);
jsonMap.put("textContent", childrenName);
jsonMap.put("childrenId", childrenId);
jsonMap.put("courseCatalogDesc", courseCatalogDesc);
jsonMap.put("courseCatalogName", courseCatalogName);
jsonMap.put("courseCover", courseCover);
jsonMap.put("courseClasses", courseClasses);
jsonMap.put("courseSeries", courseSeries);
jsonMap.put("courseDescription", courseDescription);
jsonMap.put("courseCatalog", courseCatalog);
jsonMap.put("courseId", courseId);
ansList.add(jsonMap);
}
}
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static Boolean parseHtml(Map<String, Object> jsonMap, String fileContent) {
String title = "";
Expand Down
30 changes: 28 additions & 2 deletions mindspore/src/main/resources/initDoc.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#!/bin/bash
SOURCE=/docs-file/source
TARGET=/docs-file/target
TARGET_PAPERS=/docs-file/target/papers
TARGET_CASES=/docs-file/target/cases
TARGET_COURSES=/docs-file/target/courses
mkdir -p ${SOURCE}
mkdir -p ${TARGET}
mkdir -p ${TARGET_PAPERS}
mkdir -p ${TARGET_CASES}
mkdir -p ${TARGET_COURSES}

# shellcheck disable=SC2164
cd ${SOURCE}

git clone https://gitee.com/mindspore/website-docs.git
git clone --depth 1 https://gitee.com/mindspore/website-docs.git

if [ ! -d "${SOURCE}/website-docs" ]; then
rm -rf ${TARGET}
Expand Down Expand Up @@ -65,4 +71,24 @@ find ./ -name genindex.html |xargs rm -rf
# shellcheck disable=SC2038
find ./ -name py-modindex.html |xargs rm -rf
# shellcheck disable=SC2038
find ./ -name unabridged_api.html |xargs rm -rf
find ./ -name unabridged_api.html |xargs rm -rf

# 从mindspore-portal下载 papers
# shellcheck disable=SC2164
cd ${SOURCE}

# git clone --depth 1 https://gitee.com/mindspore/mindspore-portal.git
git clone --depth 1 -b ${gitee_branch} https://${gitee_user}:${gitee_pass}@gitee.com/mindspore/mindspore-portal.git

if [ ! -d "${SOURCE}/mindspore-portal" ]; then
rm -rf ${TARGET}
exit
fi


# shellcheck disable=SC2164
cd ${SOURCE}/mindspore-portal

cp -r ${SOURCE}/mindspore-portal/packages/website/data/papers/* ${TARGET_PAPERS}/
cp -r ${SOURCE}/mindspore-portal/packages/website/data/cases/* ${TARGET_CASES}/
cp -r ${SOURCE}/mindspore-portal/packages/website/data/courses/* ${TARGET_COURSES}/
Loading