From 464a6d518515ed0ba0b6f670cabf9e188e3354ec Mon Sep 17 00:00:00 2001 From: XieChengzhi Date: Thu, 14 Nov 2024 19:34:11 +0800 Subject: [PATCH] =?UTF-8?q?mindspore=E5=A2=9E=E5=8A=A0=E8=AE=BA=E6=96=87?= =?UTF-8?q?=E6=A1=88=E4=BE=8B=E8=A7=86=E9=A2=91=E6=90=9C=E7=B4=A2=E6=BA=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindspore/pom.xml | 6 + mindspore/src/main/java/App.java | 68 ++++++++++ mindspore/src/main/java/Parse.java | 159 ++++++++++++++++++++++++ mindspore/src/main/resources/initDoc.sh | 30 ++++- 4 files changed, 261 insertions(+), 2 deletions(-) diff --git a/mindspore/pom.xml b/mindspore/pom.xml index 4d46235..4af52e1 100644 --- a/mindspore/pom.xml +++ b/mindspore/pom.xml @@ -80,6 +80,12 @@ 1.0-SNAPSHOT + + com.fasterxml.jackson.core + jackson-databind + 2.17.3 + + diff --git a/mindspore/src/main/java/App.java b/mindspore/src/main/java/App.java index 63af5f4..9d2c462 100644 --- a/mindspore/src/main/java/App.java +++ b/mindspore/src/main/java/App.java @@ -6,6 +6,12 @@ public class App { private static final String TARGET = System.getenv("TARGET"); + private static final String TARGET_PAPERS = System.getenv("TARGET") + "/papers"; + + private static final String TARGET_CASES = System.getenv("TARGET") + "/cases"; + + private static final String TARGET_COURSES = System.getenv("TARGET") + "/courses"; + private static final String APPLICATION_PATH = System.getenv("APPLICATION_PATH"); private static final String MAPPING_PATH = System.getenv("MAPPING_PATH"); @@ -18,6 +24,7 @@ public static void main(String[] args) { PublicClient.CreateClientFormConfig(APPLICATION_PATH); PublicClient.makeIndex(INDEX_PREFIX + "_zh", MAPPING_PATH); PublicClient.makeIndex(INDEX_PREFIX + "_en", MAPPING_PATH); + portalData(); fileDate(); } catch (Exception e) { System.out.println(e.getMessage()); @@ -28,6 +35,67 @@ public static void main(String[] args) { System.exit(0); } + public static void portalData() throws Exception { + File indexPaperFile = new File(TARGET_PAPERS); + File indexCaseFile = new File(TARGET_CASES); + File indexCourseFile = new File(TARGET_COURSES); + if (!indexPaperFile.exists()) { + System.out.printf("%s folder does not exist%n", indexPaperFile.getPath()); + return; + } + if (!indexCaseFile.exists()) { + System.out.printf("%s folder does not exist%n", indexCaseFile.getPath()); + return; + } + if (!indexCourseFile.exists()) { + System.out.printf("%s folder does not exist%n", indexCourseFile.getPath()); + } + + System.out.println("begin to update portal data,开始更新"); + Map hashMap = Map.of("paper", indexPaperFile, "case", indexCaseFile, "course", indexCourseFile); + hashMap.entrySet().forEach(entry -> { + try { + updatePortal(entry.getKey(), entry.getValue()); + System.out.println(entry.getKey() + " data import success"); + } catch (Exception e) { + e.printStackTrace(); + } + }); + } + + public static void updatePortal(String category, File indexFile) throws Exception { + Collection listFiles = FileUtils.listFiles(indexFile, new String[]{"json"}, true); + + for(File paresFile : listFiles) { + try { + List> escapeList = null; + switch (category) { + case "paper": + escapeList = Parse.parsePaperJson(paresFile); + break; + case "case": + escapeList = Parse.parseCaseJson(paresFile); + break; + case "course": + escapeList = Parse.parseCourseJson(paresFile); + break; + default: + break; + } + if (escapeList != null && !escapeList.isEmpty() ) { + for (Map escape : escapeList) { + PublicClient.insert(escape, INDEX_PREFIX + "_" + escape.get("lang")); + } + } else { + System.out.println("parse null : " + paresFile.getPath()); + } + } catch (Exception e) { + System.out.println(paresFile.getPath()); + System.out.println(e.getMessage()); + } + } + } + public static void fileDate() throws Exception { File indexFile = new File(TARGET); if (!indexFile.exists()) { diff --git a/mindspore/src/main/java/Parse.java b/mindspore/src/main/java/Parse.java index dcd61e7..e48dce0 100644 --- a/mindspore/src/main/java/Parse.java +++ b/mindspore/src/main/java/Parse.java @@ -1,6 +1,9 @@ import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSONArray; import com.alibaba.fastjson2.JSONObject; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + import org.apache.commons.io.FileUtils; import org.commonmark.node.Node; import org.commonmark.parser.Parser; @@ -114,6 +117,162 @@ public static Map parse(File file) throws Exception { } return jsonMap; } + public static List> parsePaperJson(File jsonFile) { + ObjectMapper objectMapper = new ObjectMapper(); + List> ansList = new ArrayList<>(); + try { + JsonNode rootNode = objectMapper.readTree(jsonFile); + + if(rootNode.isArray()) { + for(JsonNode paperNode : rootNode) { + Map jsonMap = new HashMap<>(); + String title = paperNode.get("title").asText(); + String desc = paperNode.get("desc").asText(); + String domainName = paperNode.get("domainName").asText(); + String publishedBy = paperNode.get("publishedBy").asText(); + String sourcesName = paperNode.get("sourcesName").asText(); + String postedOn = paperNode.get("postedOn").asText(); + String href = paperNode.get("href").asText(); + String codeLink = paperNode.get("codeLink").asText(); + jsonMap.put("title", title); + jsonMap.put("textContent", desc); + jsonMap.put("domainName", domainName); + jsonMap.put("publishedBy", publishedBy); + jsonMap.put("sourcesName", sourcesName); + jsonMap.put("postedOn", postedOn); + jsonMap.put("path", href); + jsonMap.put("codeLink", codeLink); + jsonMap.put("lang", "zh"); + jsonMap.put("type", "paper"); + ansList.add(jsonMap); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + return ansList; + } + + public static List> parseCaseJson(File jsonFile) { + ObjectMapper objectMapper = new ObjectMapper(); + List> ansList = new ArrayList<>(); + try { + JsonNode rootNode = objectMapper.readTree(jsonFile); + + if(rootNode.isArray()) { + for(JsonNode caseNode : rootNode) { + Map jsonMap = new HashMap<>(); + String title = caseNode.get("title").asText(); + String desc = caseNode.get("desc").asText(); + String category = caseNode.get("category").asText(); + String caseType = caseNode.get("type").asText(); + String logoImg = caseNode.get("logoImg").asText(); + String href = caseNode.get("href").asText(); + + String logoImgDark = null; + if (caseNode.get("logoImgDark") != null) { + logoImgDark = caseNode.get("logoImgDark").asText(); + } + + JsonNode technologiesNode = caseNode.get("technologies"); + List technologies = new ArrayList<>(); + if (technologiesNode != null && technologiesNode.isArray()) { + for (JsonNode techNode : technologiesNode) { + + technologies.add(techNode.asText()); + } + } else if (technologiesNode != null) { + technologies.add(technologiesNode.asText()); + } + + jsonMap.put("title", title); + jsonMap.put("textContent", desc); + jsonMap.put("lang", "zh"); + jsonMap.put("type", "case"); + jsonMap.put("path", href); + jsonMap.put("category", category); + jsonMap.put("technologies", technologies); + jsonMap.put("logoImg", logoImg); + jsonMap.put("caseType", caseType); + jsonMap.put("logoImgDark", logoImgDark); + ansList.add(jsonMap); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + return ansList; + } + + public static List> parseCourseJson(File jsonFile) { + ObjectMapper objectMapper = new ObjectMapper(); + List> ansList = new ArrayList<>(); + try { + JsonNode rootNode = objectMapper.readTree(jsonFile); + if(rootNode.isArray()) { + for (JsonNode courseNode : rootNode) { + String courseId = (courseNode.get("id") != null) ? courseNode.get("id").asText() : null; + String courseCatalog = (courseNode.get("catalog") != null) ? courseNode.get("catalog").asText() : null; + String courseDescription = (courseNode.get("description") != null) ? courseNode.get("description").asText() : null; + String courseSeries = (courseNode.get("series") != null) ? courseNode.get("series").asText() : null; + String courseClasses = (courseNode.get("classes") != null) ? courseNode.get("classes").asText() : null; + String courseCover = (courseNode.get("cover") != null) ? courseNode.get("cover").asText() : null; + String courseCatalogName = (courseNode.get("catalogName") != null) ? courseNode.get("catalogName").asText() : null; + String courseCatalogDesc = (courseNode.get("catalogDesc") != null) ? courseNode.get("catalogDesc").asText() : null; + JsonNode childrenNodes = courseNode.get("children"); + if (childrenNodes != null && childrenNodes.isArray()) { + for (JsonNode childrenNode : childrenNodes) { + String childrenId = (childrenNode.get("id") != null) ? childrenNode.get("id").asText() : null; + String childrenName = (childrenNode.get("name") != null) ? childrenNode.get("name").asText() : null; + String childrenCount = (childrenNode.get("count") != null) ? childrenNode.get("count").asText() : null; + String childrenCoverImg = (childrenNode.get("coverImg") != null) ? childrenNode.get("coverImg").asText() : null; + JsonNode courseListNodes = (childrenNode.get("courseList") != null) ? childrenNode.get("courseList") : null; + if (courseListNodes != null && courseListNodes.isArray()) { + for (JsonNode courseListNode : courseListNodes) { + Map jsonMap = new HashMap<>(); + String courseListId = (courseListNode.get("id") != null) ? courseListNode.get("id").asText() : null; + String courseListCategoryId = (courseListNode.get("categoryId") != null) ? courseListNode.get("categoryId").asText() : null; + String courseListTitle = (courseListNode.get("title") != null) ? courseListNode.get("title").asText() : null; + String courseListLable = (courseListNode.get("lable") != null) ? courseListNode.get("lable").asText() : null; + String courseListLang = (courseListNode.get("lang") != null) ? courseListNode.get("lang").asText() : null; + String courseListForm = (courseListNode.get("form") != null) ? courseListNode.get("form").asText() : null; + String courseListVideoType = (courseListNode.get("videoType") != null) ? courseListNode.get("videoType").asText() : null; + String courseListVideoUrl = (courseListNode.get("videoUrl") != null) ? courseListNode.get("videoUrl").asText() : null; + String courseListPlayMin = (courseListNode.get("playMin") != null) ? courseListNode.get("playMin").asText() : null; + jsonMap.put("type", "course"); + jsonMap.put("courseListPlayMin", courseListPlayMin); + jsonMap.put("path", courseListVideoUrl); + jsonMap.put("courseListVideoType", courseListVideoType); + jsonMap.put("courseListForm", courseListForm); + jsonMap.put("lang", courseListLang); + jsonMap.put("courseListLable", courseListLable); + jsonMap.put("title", courseListTitle); + jsonMap.put("courseListCategoryId", courseListCategoryId); + jsonMap.put("courseListId", courseListId); + jsonMap.put("childrenCoverImg", childrenCoverImg); + jsonMap.put("childrenCount", childrenCount); + jsonMap.put("textContent", childrenName); + jsonMap.put("childrenId", childrenId); + jsonMap.put("courseCatalogDesc", courseCatalogDesc); + jsonMap.put("courseCatalogName", courseCatalogName); + jsonMap.put("courseCover", courseCover); + jsonMap.put("courseClasses", courseClasses); + jsonMap.put("courseSeries", courseSeries); + jsonMap.put("courseDescription", courseDescription); + jsonMap.put("courseCatalog", courseCatalog); + jsonMap.put("courseId", courseId); + ansList.add(jsonMap); + } + } + } + } + } + } + } catch (IOException e) { + e.printStackTrace(); + } + return ansList; + } public static Boolean parseHtml(Map jsonMap, String fileContent) { String title = ""; diff --git a/mindspore/src/main/resources/initDoc.sh b/mindspore/src/main/resources/initDoc.sh index 58d7868..ba0cb80 100644 --- a/mindspore/src/main/resources/initDoc.sh +++ b/mindspore/src/main/resources/initDoc.sh @@ -1,13 +1,19 @@ #!/bin/bash SOURCE=/docs-file/source TARGET=/docs-file/target +TARGET_PAPERS=/docs-file/target/papers +TARGET_CASES=/docs-file/target/cases +TARGET_COURSES=/docs-file/target/courses mkdir -p ${SOURCE} mkdir -p ${TARGET} +mkdir -p ${TARGET_PAPERS} +mkdir -p ${TARGET_CASES} +mkdir -p ${TARGET_COURSES} # shellcheck disable=SC2164 cd ${SOURCE} -git clone https://gitee.com/mindspore/website-docs.git +git clone --depth 1 https://gitee.com/mindspore/website-docs.git if [ ! -d "${SOURCE}/website-docs" ]; then rm -rf ${TARGET} @@ -65,4 +71,24 @@ find ./ -name genindex.html |xargs rm -rf # shellcheck disable=SC2038 find ./ -name py-modindex.html |xargs rm -rf # shellcheck disable=SC2038 -find ./ -name unabridged_api.html |xargs rm -rf \ No newline at end of file +find ./ -name unabridged_api.html |xargs rm -rf + +# 从mindspore-portal下载 papers +# shellcheck disable=SC2164 +cd ${SOURCE} + +# git clone --depth 1 https://gitee.com/mindspore/mindspore-portal.git +git clone --depth 1 -b ${gitee_branch} https://${gitee_user}:${gitee_pass}@gitee.com/mindspore/mindspore-portal.git + +if [ ! -d "${SOURCE}/mindspore-portal" ]; then + rm -rf ${TARGET} + exit +fi + + +# shellcheck disable=SC2164 +cd ${SOURCE}/mindspore-portal + +cp -r ${SOURCE}/mindspore-portal/packages/website/data/papers/* ${TARGET_PAPERS}/ +cp -r ${SOURCE}/mindspore-portal/packages/website/data/cases/* ${TARGET_CASES}/ +cp -r ${SOURCE}/mindspore-portal/packages/website/data/courses/* ${TARGET_COURSES}/ \ No newline at end of file