Skip to content

Commit

Permalink
fix(gatsby-source-drupal): limit concurrentFileRequests while downloa…
Browse files Browse the repository at this point in the history
…ding remote images (gatsbyjs#13943)

limit concurrentFileRequests while downloading remote files from drupal. This fixes memory exhaustion when downloading to many requests at once.
  • Loading branch information
bakeruk authored and wardpeet committed May 14, 2019
1 parent ee212ee commit 1f0e0f4
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 80 deletions.
20 changes: 20 additions & 0 deletions packages/gatsby-source-drupal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,26 @@ module.exports = {
}
```

## Concurrent File Requests

You can use the `concurrentFileRequests` option to change how many simultaneous file requests are made to the server/service. This benefits build speed, however to many concurrent file request could cause memory exhaustion depending on the server's memory size so change with caution.

```javascript
// In your gatsby-config.js
module.exports = {
plugins: [
{
resolve: `gatsby-source-drupal`,
options: {
baseUrl: `https://live-contentacms.pantheonsite.io/`,
apiBase: `api`, // optional, defaults to `jsonapi`
concurrentFileRequests: 60, // optional, defaults to `20`
},
},
],
}
```

## How to query

You can query nodes created from Drupal like the following:
Expand Down
3 changes: 2 additions & 1 deletion packages/gatsby-source-drupal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
"axios": "^0.18.0",
"bluebird": "^3.5.0",
"gatsby-source-filesystem": "^2.0.33",
"lodash": "^4.17.10"
"lodash": "^4.17.10",
"tiny-async-pool": "^1.0.4"
},
"devDependencies": {
"@babel/cli": "^7.0.0",
Expand Down
11 changes: 11 additions & 0 deletions packages/gatsby-source-drupal/src/__tests__/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jest.mock(`gatsby-source-filesystem`, () => {
createRemoteFileNode: jest.fn(),
}
})

const { createRemoteFileNode } = require(`gatsby-source-filesystem`)

const { sourceNodes } = require(`../gatsby-node`)
Expand All @@ -28,13 +29,23 @@ describe(`gatsby-source-drupal`, () => {
const createContentDigest = jest.fn().mockReturnValue(`contentDigest`)
const { objectContaining } = expect

const activity = {
start: jest.fn(),
end: jest.fn(),
}
const reporter = {
info: jest.fn(),
activityTimer: jest.fn(() => activity),
}

beforeAll(async () => {
const args = {
createNodeId,
createContentDigest,
actions: {
createNode: jest.fn(node => (nodes[node.id] = node)),
},
reporter,
}

await sourceNodes(args, { baseUrl })
Expand Down
121 changes: 77 additions & 44 deletions packages/gatsby-source-drupal/src/gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,40 @@ const _ = require(`lodash`)
const { createRemoteFileNode } = require(`gatsby-source-filesystem`)
const { URL } = require(`url`)
const { nodeFromData } = require(`./normalize`)
const asyncPool = require(`tiny-async-pool`)

exports.sourceNodes = async (
{ actions, store, cache, createNodeId, createContentDigest },
{ baseUrl, apiBase, basicAuth, filters, headers, params }
{ actions, store, cache, createNodeId, createContentDigest, reporter },
{
baseUrl,
apiBase,
basicAuth,
filters,
headers,
params,
concurrentFileRequests,
}
) => {
const { createNode } = actions
const drupalFetchActivity = reporter.activityTimer(`Fetch data from Drupal`)
const downloadingFilesActivity = reporter.activityTimer(
`Remote file download`
)

// Default apiBase to `jsonapi`
apiBase = apiBase || `jsonapi`

// Default concurrentFileRequests to `20`
concurrentFileRequests = concurrentFileRequests || 20

// Touch existing Drupal nodes so Gatsby doesn't garbage collect them.
// _.values(store.getState().nodes)
// .filter(n => n.internal.type.slice(0, 8) === `drupal__`)
// .forEach(n => touchNode({ nodeId: n.id }))

// Fetch articles.
// console.time(`fetch Drupal data`)
console.log(`Starting to fetch data from Drupal`)
reporter.info(`Starting to fetch data from Drupal`)

// TODO restore this
// let lastFetched
Expand All @@ -32,6 +48,8 @@ exports.sourceNodes = async (
// .lastFetched
// }

drupalFetchActivity.start()

const data = await axios.get(`${baseUrl}/${apiBase}`, {
auth: basicAuth,
headers,
Expand Down Expand Up @@ -94,6 +112,8 @@ exports.sourceNodes = async (
})
)

drupalFetchActivity.end()

// Make list of all IDs so we can check against that when creating
// relationships.
const ids = {}
Expand Down Expand Up @@ -187,48 +207,61 @@ exports.sourceNodes = async (
})
})

// Download all files.
await Promise.all(
nodes.map(async node => {
let fileNode
if (
node.internal.type === `files` ||
node.internal.type === `file__file`
) {
try {
let fileUrl = node.url
if (typeof node.uri === `object`) {
// Support JSON API 2.x file URI format https://www.drupal.org/node/2982209
fileUrl = node.uri.url
reporter.info(`Downloading remote files from Drupal`)
downloadingFilesActivity.start()

// Download all files (await for each pool to complete to fix concurrency issues)
await asyncPool(concurrentFileRequests, nodes, async node => {
// If we have basicAuth credentials, add them to the request.
const auth =
typeof basicAuth === `object`
? {
htaccess_user: basicAuth.username,
htaccess_pass: basicAuth.password,
}
// Resolve w/ baseUrl if node.uri isn't absolute.
const url = new URL(fileUrl, baseUrl)
// If we have basicAuth credentials, add them to the request.
const auth =
typeof basicAuth === `object`
? {
htaccess_user: basicAuth.username,
htaccess_pass: basicAuth.password,
}
: {}
fileNode = await createRemoteFileNode({
url: url.href,
store,
cache,
createNode,
createNodeId,
parentNodeId: node.id,
auth,
})
} catch (e) {
// Ignore
}
if (fileNode) {
node.localFile___NODE = fileNode.id
}
: {}
let fileNode = null
let fileUrl = ``
let url = {}

if (node.internal.type === `files` || node.internal.type === `file__file`) {
fileUrl = node.url

// If node.uri is an object
if (typeof node.uri === `object`) {
// Support JSON API 2.x file URI format https://www.drupal.org/node/2982209
fileUrl = node.uri.url
}

// Resolve w/ baseUrl if node.uri isn't absolute.
url = new URL(fileUrl, baseUrl)

// Create the remote file from the given node
try {
fileNode = await createRemoteFileNode({
url: url.href,
store,
cache,
createNode,
createNodeId,
parentNodeId: node.id,
auth,
})
} catch (err) {
reporter.error(err)
}
})
)

nodes.forEach(n => createNode(n))
// If the fileNode exists set the node ID of the local file
if (fileNode) {
node.localFile___NODE = fileNode.id
}
}
})

downloadingFilesActivity.end()

// Create each node
for (const node of nodes) {
createNode(node)
}
}
50 changes: 15 additions & 35 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -17405,17 +17405,7 @@ react-docgen@^4.1.0:
node-dir "^0.1.10"
recast "^0.17.3"

react-dom@^16.8.4:
version "16.8.6"
resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-16.8.6.tgz#71d6303f631e8b0097f56165ef608f051ff6e10f"
integrity sha512-1nL7PIq9LTL3fthPqwkvr2zY7phIPjYrT0jp4HjyEQrEROnw4dG41VVwi/wfoCneoleqrNX7iAD+pXebJZwrwA==
dependencies:
loose-envify "^1.1.0"
object-assign "^4.1.1"
prop-types "^15.6.2"
scheduler "^0.13.6"

react-dom@^16.8.6:
react-dom@^16.8.4, react-dom@^16.8.6:
version "16.8.6"
resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-16.8.6.tgz#71d6303f631e8b0097f56165ef608f051ff6e10f"
integrity sha512-1nL7PIq9LTL3fthPqwkvr2zY7phIPjYrT0jp4HjyEQrEROnw4dG41VVwi/wfoCneoleqrNX7iAD+pXebJZwrwA==
Expand Down Expand Up @@ -17462,17 +17452,7 @@ react-typography@^0.16.18:
resolved "https://registry.yarnpkg.com/react-typography/-/react-typography-0.16.18.tgz#89341b63d615f1dfcf5e471797df5acce5bde1f3"
integrity sha512-BFkw59H2mkYRLLtHOT2UZve/9/RticAvL7o7rheOo/+tlwQyfF61wF2XzGRcA3IWASg+DbXnNViyywHBFlNj6A==

react@^16.8.4:
version "16.8.6"
resolved "https://registry.yarnpkg.com/react/-/react-16.8.6.tgz#ad6c3a9614fd3a4e9ef51117f54d888da01f2bbe"
integrity sha512-pC0uMkhLaHm11ZSJULfOBqV4tIZkx87ZLvbbQYunNixAAvjnC+snJCg0XQXn9VIsttVsbZP/H/ewzgsd5fxKXw==
dependencies:
loose-envify "^1.1.0"
object-assign "^4.1.1"
prop-types "^15.6.2"
scheduler "^0.13.6"

react@^16.8.6:
react@^16.8.4, react@^16.8.6:
version "16.8.6"
resolved "https://registry.yarnpkg.com/react/-/react-16.8.6.tgz#ad6c3a9614fd3a4e9ef51117f54d888da01f2bbe"
integrity sha512-pC0uMkhLaHm11ZSJULfOBqV4tIZkx87ZLvbbQYunNixAAvjnC+snJCg0XQXn9VIsttVsbZP/H/ewzgsd5fxKXw==
Expand Down Expand Up @@ -18742,14 +18722,6 @@ scheduler@^0.13.6:
loose-envify "^1.1.0"
object-assign "^4.1.1"

scheduler@^0.13.6:
version "0.13.6"
resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.13.6.tgz#466a4ec332467b31a91b9bf74e5347072e4cd889"
integrity sha512-IWnObHt413ucAYKsD9J1QShUKkbKLQQHdxRyw73sw4FN26iWr3DY/H34xGPe4nmL1DwXyWmSWmMrA9TfQbE/XQ==
dependencies:
loose-envify "^1.1.0"
object-assign "^4.1.1"

schema-utils@^0.4.0, schema-utils@^0.4.4, schema-utils@^0.4.5:
version "0.4.7"
resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-0.4.7.tgz#ba74f597d2be2ea880131746ee17d0a093c68187"
Expand Down Expand Up @@ -19040,11 +19012,6 @@ shell-escape@^0.2.0:
resolved "https://registry.yarnpkg.com/shell-escape/-/shell-escape-0.2.0.tgz#68fd025eb0490b4f567a027f0bf22480b5f84133"
integrity sha1-aP0CXrBJC09WegJ/C/IkgLX4QTM=

shell-escape@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/shell-escape/-/shell-escape-0.2.0.tgz#68fd025eb0490b4f567a027f0bf22480b5f84133"
integrity sha1-aP0CXrBJC09WegJ/C/IkgLX4QTM=

[email protected], shell-quote@^1.6.1:
version "1.6.1"
resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.6.1.tgz#f4781949cce402697127430ea3b3c5476f481767"
Expand Down Expand Up @@ -20448,6 +20415,14 @@ timsort@^0.3.0:
resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4"
integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q=

tiny-async-pool@^1.0.4:
version "1.0.4"
resolved "https://registry.yarnpkg.com/tiny-async-pool/-/tiny-async-pool-1.0.4.tgz#bbac28a39a754576d8d0615d4e2ad35c87da6169"
integrity sha512-4gdLvReS3WwmPCxZjj38Go673xhEXlK77fVFA2x+dE2Bf9QzDkVQb3rdO1iJt337ybhir42m4zM2GHJjiuFwoA==
dependencies:
semver "^5.5.0"
yaassertion "^1.0.0"

tiny-emitter@^2.0.0:
version "2.0.2"
resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.0.2.tgz#82d27468aca5ade8e5fd1e6d22b57dd43ebdfb7c"
Expand Down Expand Up @@ -22340,6 +22315,11 @@ y18n@^3.2.1:
resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.0.tgz#95ef94f85ecc81d007c264e190a120f0a3c8566b"
integrity sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==

yaassertion@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/yaassertion/-/yaassertion-1.0.0.tgz#630c5c44c660d064006f1f15d79bd256d373fc9e"
integrity sha512-fepEqRG+/2ZkJBf2ioA4LTOZUWrBN3F2EuKms3zE47M0zqph5aWs6SGiyz9wyzPkowhtiKapHV52IsRBfYCDwA==

yallist@^2.0.0, yallist@^2.1.2:
version "2.1.2"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52"
Expand Down

0 comments on commit 1f0e0f4

Please sign in to comment.