Skip to content

Commit

Permalink
Merge pull request #3561 from magda-io/issue/3556
Browse files Browse the repository at this point in the history
Issue/3556  robots.txt should be served as content-type text/plain and other related improvements
  • Loading branch information
t83714 authored Sep 16, 2024
2 parents c3f6c63 + 28d2a00 commit df315bc
Show file tree
Hide file tree
Showing 24 changed files with 606 additions and 493 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- #3559: Set `conflicts` to true when Indexer performs the trim operation.
- Increase indexer client connection idle-timeout to avoid encountering connection reset error for downloading large region files
- Upgraded OpenSearch to v2.16.0
- #3556: Serves robots.txt as content-type `text/plain` instead and other sitemap & crawler view related improvements.

## v4.2.3

Expand Down
2 changes: 1 addition & 1 deletion magda-minion-framework/src/setupRecrawlEndpoint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Crawler from "./Crawler.js";
/**
* @apiDefine GenericErrorMinionJson
* @apiError (Error 500 JSON Response Body) {Boolean} isSuccess Whether or not the operation is successfully done.
* @apiError (Error 500 JSON Response Body) {Boolean} [isNewCrawler] indicate Whether it's a new cralwer process or existing crawling process is still no-going.
* @apiError (Error 500 JSON Response Body) {Boolean} [isNewCrawler] indicate Whether it's a new crawler process or existing crawling process is still no-going.
* @apiError (Error 500 JSON Response Body) {String} errorMessage Free text error message. Only available when `isSuccess`=`false`
* @apiErrorExample {json} Response:
* {
Expand Down
7 changes: 0 additions & 7 deletions magda-web-client/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@
rel="shortcut icon"
href="/api/v0/content/favicon.ico"
/>
<link
rel="sitemap"
type="application/xml"
title="Sitemap"
href="./sitemap"
/>

<!--[if lte IE 9]>
<script type="text/javascript">
"use strict";
Expand Down
21 changes: 18 additions & 3 deletions magda-web-client/src/Components/Dataset/Search/SearchBox.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,25 @@ class SearchBox extends Component {
getSearchBoxValue() {
if (defined(this.state.searchText)) {
return this.state.searchText;
} else if (defined(queryString.parse(this.props.location.search).q)) {
return queryString.parse(this.props.location.search).q;
} else {
const result = queryString.parse(this.props.location.search);
const q = result?.q;
if (typeof q !== "string" || !q.trim()) {
return "";
}
const pathname =
typeof this?.props?.location?.pathname === "string"
? this.props.location.pathname
: "";
if (
/\/search$/.test(pathname) ||
/\/drafts$/.test(pathname) ||
/\/all-datasets$/.test(pathname)
) {
return q;
}
return "";
}
return "";
}

onDismissError() {
Expand Down
10 changes: 10 additions & 0 deletions magda-web-server/src/addRobotsMeta.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export default function addRobotsMeta(
indexContent: string,
robotsMetaContent: string = "noindex",
robotName: string = "robots"
) {
return indexContent.replace(
"</head>",
`<meta name="${robotName}" content="${robotsMetaContent}">\n</head>`
);
}
10 changes: 9 additions & 1 deletion magda-web-server/src/buildSitemapRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const DATASET_REQUIRED_ASPECTS = ["dcat-dataset-strings"];

export type SitemapRouterOptions = {
baseExternalUrl: string;
uiBaseUrl: string;
registry: Registry;
cacheSeconds: number;
};
Expand All @@ -22,6 +23,7 @@ let pageTokenQueryTime: number | null = null;

export default function buildSitemapRouter({
baseExternalUrl,
uiBaseUrl,
registry,
cacheSeconds
}: SitemapRouterOptions): express.Router {
Expand Down Expand Up @@ -82,6 +84,7 @@ export default function buildSitemapRouter({
.path(
URI.joinPaths(
baseExternalUrl,
uiBaseUrl,
"sitemap/main.xml"
).toString()
)
Expand All @@ -95,6 +98,7 @@ export default function buildSitemapRouter({
.path(
URI.joinPaths(
baseExternalUrl,
uiBaseUrl,
"sitemap/dataset/afterToken",
token.toString() + ".xml"
).toString()
Expand Down Expand Up @@ -138,7 +142,10 @@ export default function buildSitemapRouter({
});
const dataPromise = streamToPromise(sms);
sms.write({
url: baseExternalUri.toString(),
url: baseExternalUri
.clone()
.path(URI.joinPaths(baseExternalUrl, uiBaseUrl).toString())
.toString(),
changefreq: "daily"
});
sms.end();
Expand Down Expand Up @@ -192,6 +199,7 @@ export default function buildSitemapRouter({
.path(
URI.joinPaths(
baseExternalUrl,
uiBaseUrl,
`/dataset/${encodeURIComponent(record.id)}`
).toString()
)
Expand Down
File renamed without changes.
133 changes: 0 additions & 133 deletions magda-web-server/src/createCralwerViewRouter.ts

This file was deleted.

Loading

0 comments on commit df315bc

Please sign in to comment.