Skip to content

Commit

Permalink
Merge pull request #8 from opensourceways/xcz
Browse files Browse the repository at this point in the history
mindspore增加论文案例视频搜索源
  • Loading branch information
zhongjun2 authored Nov 30, 2024
2 parents 01e42a5 + efd9900 commit fac764a
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 2 deletions.
6 changes: 6 additions & 0 deletions mindspore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@
<version>1.0-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.17.3</version>
</dependency>

</dependencies>

<build>
Expand Down
68 changes: 68 additions & 0 deletions mindspore/src/main/java/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
public class App {
private static final String TARGET = System.getenv("TARGET");

private static final String TARGET_PAPERS = System.getenv("TARGET") + "/papers";

private static final String TARGET_CASES = System.getenv("TARGET") + "/cases";

private static final String TARGET_COURSES = System.getenv("TARGET") + "/courses";

private static final String APPLICATION_PATH = System.getenv("APPLICATION_PATH");

private static final String MAPPING_PATH = System.getenv("MAPPING_PATH");
Expand All @@ -18,6 +24,7 @@ public static void main(String[] args) {
PublicClient.CreateClientFormConfig(APPLICATION_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_zh", MAPPING_PATH);
PublicClient.makeIndex(INDEX_PREFIX + "_en", MAPPING_PATH);
portalData();
fileDate();
} catch (Exception e) {
System.out.println(e.getMessage());
Expand All @@ -28,6 +35,67 @@ public static void main(String[] args) {
System.exit(0);
}

public static void portalData() throws Exception {
File indexPaperFile = new File(TARGET_PAPERS);
File indexCaseFile = new File(TARGET_CASES);
File indexCourseFile = new File(TARGET_COURSES);
if (!indexPaperFile.exists()) {
System.out.printf("%s folder does not exist%n", indexPaperFile.getPath());
return;
}
if (!indexCaseFile.exists()) {
System.out.printf("%s folder does not exist%n", indexCaseFile.getPath());
return;
}
if (!indexCourseFile.exists()) {
System.out.printf("%s folder does not exist%n", indexCourseFile.getPath());
}

System.out.println("begin to update portal data,开始更新");
Map<String, File> hashMap = Map.of("paper", indexPaperFile, "case", indexCaseFile, "course", indexCourseFile);
hashMap.entrySet().forEach(entry -> {
try {
updatePortal(entry.getKey(), entry.getValue());
System.out.println(entry.getKey() + " data import success");
} catch (Exception e) {
e.printStackTrace();
}
});
}

public static void updatePortal(String category, File indexFile) throws Exception {
Collection<File> listFiles = FileUtils.listFiles(indexFile, new String[]{"json"}, true);

for(File paresFile : listFiles) {
try {
List<Map<String, Object>> escapeList = null;
switch (category) {
case "paper":
escapeList = Parse.parsePaperJson(paresFile);
break;
case "case":
escapeList = Parse.parseCaseJson(paresFile);
break;
case "course":
escapeList = Parse.parseCourseJson(paresFile);
break;
default:
break;
}
if (escapeList != null && !escapeList.isEmpty() ) {
for (Map<String, Object> escape : escapeList) {
PublicClient.insert(escape, INDEX_PREFIX + "_" + escape.get("lang"));
}
} else {
System.out.println("parse null : " + paresFile.getPath());
}
} catch (Exception e) {
System.out.println(paresFile.getPath());
System.out.println(e.getMessage());
}
}
}

public static void fileDate() throws Exception {
File indexFile = new File(TARGET);
if (!indexFile.exists()) {
Expand Down
159 changes: 159 additions & 0 deletions mindspore/src/main/java/Parse.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.commons.io.FileUtils;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
Expand Down Expand Up @@ -114,6 +117,162 @@ public static Map<String, Object> parse(File file) throws Exception {
}
return jsonMap;
}
public static List<Map<String, Object>> parsePaperJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);

if(rootNode.isArray()) {
for(JsonNode paperNode : rootNode) {
Map<String, Object> jsonMap = new HashMap<>();
String title = paperNode.get("title").asText();
String desc = paperNode.get("desc").asText();
String domainName = paperNode.get("domainName").asText();
String publishedBy = paperNode.get("publishedBy").asText();
String sourcesName = paperNode.get("sourcesName").asText();
String postedOn = paperNode.get("postedOn").asText();
String href = paperNode.get("href").asText();
String codeLink = paperNode.get("codeLink").asText();
jsonMap.put("title", title);
jsonMap.put("textContent", desc);
jsonMap.put("domainName", domainName);
jsonMap.put("publishedBy", publishedBy);
jsonMap.put("sourcesName", sourcesName);
jsonMap.put("postedOn", postedOn);
jsonMap.put("path", href);
jsonMap.put("codeLink", codeLink);
jsonMap.put("lang", "zh");
jsonMap.put("type", "paper");
ansList.add(jsonMap);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static List<Map<String, Object>> parseCaseJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);

if(rootNode.isArray()) {
for(JsonNode caseNode : rootNode) {
Map<String, Object> jsonMap = new HashMap<>();
String title = caseNode.get("title").asText();
String desc = caseNode.get("desc").asText();
String category = caseNode.get("category").asText();
String caseType = caseNode.get("type").asText();
String logoImg = caseNode.get("logoImg").asText();
String href = caseNode.get("href").asText();

String logoImgDark = null;
if (caseNode.get("logoImgDark") != null) {
logoImgDark = caseNode.get("logoImgDark").asText();
}

JsonNode technologiesNode = caseNode.get("technologies");
List<String> technologies = new ArrayList<>();
if (technologiesNode != null && technologiesNode.isArray()) {
for (JsonNode techNode : technologiesNode) {

technologies.add(techNode.asText());
}
} else if (technologiesNode != null) {
technologies.add(technologiesNode.asText());
}

jsonMap.put("title", title);
jsonMap.put("textContent", desc);
jsonMap.put("lang", "zh");
jsonMap.put("type", "case");
jsonMap.put("path", href);
jsonMap.put("category", category);
jsonMap.put("technologies", technologies);
jsonMap.put("logoImg", logoImg);
jsonMap.put("caseType", caseType);
jsonMap.put("logoImgDark", logoImgDark);
ansList.add(jsonMap);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static List<Map<String, Object>> parseCourseJson(File jsonFile) {
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, Object>> ansList = new ArrayList<>();
try {
JsonNode rootNode = objectMapper.readTree(jsonFile);
if(rootNode.isArray()) {
for (JsonNode courseNode : rootNode) {
String courseId = (courseNode.get("id") != null) ? courseNode.get("id").asText() : null;
String courseCatalog = (courseNode.get("catalog") != null) ? courseNode.get("catalog").asText() : null;
String courseDescription = (courseNode.get("description") != null) ? courseNode.get("description").asText() : null;
String courseSeries = (courseNode.get("series") != null) ? courseNode.get("series").asText() : null;
String courseClasses = (courseNode.get("classes") != null) ? courseNode.get("classes").asText() : null;
String courseCover = (courseNode.get("cover") != null) ? courseNode.get("cover").asText() : null;
String courseCatalogName = (courseNode.get("catalogName") != null) ? courseNode.get("catalogName").asText() : null;
String courseCatalogDesc = (courseNode.get("catalogDesc") != null) ? courseNode.get("catalogDesc").asText() : null;
JsonNode childrenNodes = courseNode.get("children");
if (childrenNodes != null && childrenNodes.isArray()) {
for (JsonNode childrenNode : childrenNodes) {
String childrenId = (childrenNode.get("id") != null) ? childrenNode.get("id").asText() : null;
String childrenName = (childrenNode.get("name") != null) ? childrenNode.get("name").asText() : null;
String childrenCount = (childrenNode.get("count") != null) ? childrenNode.get("count").asText() : null;
String childrenCoverImg = (childrenNode.get("coverImg") != null) ? childrenNode.get("coverImg").asText() : null;
JsonNode courseListNodes = (childrenNode.get("courseList") != null) ? childrenNode.get("courseList") : null;
if (courseListNodes != null && courseListNodes.isArray()) {
for (JsonNode courseListNode : courseListNodes) {
Map<String, Object> jsonMap = new HashMap<>();
String courseListId = (courseListNode.get("id") != null) ? courseListNode.get("id").asText() : null;
String courseListCategoryId = (courseListNode.get("categoryId") != null) ? courseListNode.get("categoryId").asText() : null;
String courseListTitle = (courseListNode.get("title") != null) ? courseListNode.get("title").asText() : null;
String courseListLable = (courseListNode.get("lable") != null) ? courseListNode.get("lable").asText() : null;
String courseListLang = (courseListNode.get("lang") != null) ? courseListNode.get("lang").asText() : null;
String courseListForm = (courseListNode.get("form") != null) ? courseListNode.get("form").asText() : null;
String courseListVideoType = (courseListNode.get("videoType") != null) ? courseListNode.get("videoType").asText() : null;
String courseListVideoUrl = (courseListNode.get("videoUrl") != null) ? courseListNode.get("videoUrl").asText() : null;
String courseListPlayMin = (courseListNode.get("playMin") != null) ? courseListNode.get("playMin").asText() : null;
jsonMap.put("type", "course");
jsonMap.put("courseListPlayMin", courseListPlayMin);
jsonMap.put("path", courseListVideoUrl);
jsonMap.put("courseListVideoType", courseListVideoType);
jsonMap.put("courseListForm", courseListForm);
jsonMap.put("lang", courseListLang);
jsonMap.put("courseListLable", courseListLable);
jsonMap.put("title", courseListTitle);
jsonMap.put("courseListCategoryId", courseListCategoryId);
jsonMap.put("courseListId", courseListId);
jsonMap.put("childrenCoverImg", childrenCoverImg);
jsonMap.put("childrenCount", childrenCount);
jsonMap.put("textContent", childrenName);
jsonMap.put("childrenId", childrenId);
jsonMap.put("courseCatalogDesc", courseCatalogDesc);
jsonMap.put("courseCatalogName", courseCatalogName);
jsonMap.put("courseCover", courseCover);
jsonMap.put("courseClasses", courseClasses);
jsonMap.put("courseSeries", courseSeries);
jsonMap.put("courseDescription", courseDescription);
jsonMap.put("courseCatalog", courseCatalog);
jsonMap.put("courseId", courseId);
ansList.add(jsonMap);
}
}
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ansList;
}

public static Boolean parseHtml(Map<String, Object> jsonMap, String fileContent) {
String title = "";
Expand Down
30 changes: 28 additions & 2 deletions mindspore/src/main/resources/initDoc.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#!/bin/bash
SOURCE=/docs-file/source
TARGET=/docs-file/target
TARGET_PAPERS=/docs-file/target/papers
TARGET_CASES=/docs-file/target/cases
TARGET_COURSES=/docs-file/target/courses
mkdir -p ${SOURCE}
mkdir -p ${TARGET}
mkdir -p ${TARGET_PAPERS}
mkdir -p ${TARGET_CASES}
mkdir -p ${TARGET_COURSES}

# shellcheck disable=SC2164
cd ${SOURCE}

git clone https://gitee.com/mindspore/website-docs.git
git clone --depth 1 https://gitee.com/mindspore/website-docs.git

if [ ! -d "${SOURCE}/website-docs" ]; then
rm -rf ${TARGET}
Expand Down Expand Up @@ -65,4 +71,24 @@ find ./ -name genindex.html |xargs rm -rf
# shellcheck disable=SC2038
find ./ -name py-modindex.html |xargs rm -rf
# shellcheck disable=SC2038
find ./ -name unabridged_api.html |xargs rm -rf
find ./ -name unabridged_api.html |xargs rm -rf

# 从mindspore-portal下载 papers
# shellcheck disable=SC2164
cd ${SOURCE}

# git clone --depth 1 https://gitee.com/mindspore/mindspore-portal.git
git clone --depth 1 -b ${gitee_branch} https://${gitee_user}:${gitee_pass}@gitee.com/mindspore/mindspore-portal.git

if [ ! -d "${SOURCE}/mindspore-portal" ]; then
rm -rf ${TARGET}
exit
fi


# shellcheck disable=SC2164
cd ${SOURCE}/mindspore-portal

cp -r ${SOURCE}/mindspore-portal/packages/website/data/papers/* ${TARGET_PAPERS}/
cp -r ${SOURCE}/mindspore-portal/packages/website/data/cases/* ${TARGET_CASES}/
cp -r ${SOURCE}/mindspore-portal/packages/website/data/courses/* ${TARGET_COURSES}/

0 comments on commit fac764a

Please sign in to comment.