diff --git a/packages/gatsby-source-drupal/README.md b/packages/gatsby-source-drupal/README.md index 99ec215e92909..ce917522cc6fe 100644 --- a/packages/gatsby-source-drupal/README.md +++ b/packages/gatsby-source-drupal/README.md @@ -142,6 +142,26 @@ module.exports = { } ``` +## Concurrent File Requests + +You can use the `concurrentFileRequests` option to change how many simultaneous file requests are made to the server/service. This benefits build speed, however to many concurrent file request could cause memory exhaustion depending on the server's memory size so change with caution. + +```javascript +// In your gatsby-config.js +module.exports = { + plugins: [ + { + resolve: `gatsby-source-drupal`, + options: { + baseUrl: `https://live-contentacms.pantheonsite.io/`, + apiBase: `api`, // optional, defaults to `jsonapi` + concurrentFileRequests: 60, // optional, defaults to `20` + }, + }, + ], +} +``` + ## How to query You can query nodes created from Drupal like the following: diff --git a/packages/gatsby-source-drupal/package.json b/packages/gatsby-source-drupal/package.json index 949aedb4eeca8..877a51d35ffd5 100644 --- a/packages/gatsby-source-drupal/package.json +++ b/packages/gatsby-source-drupal/package.json @@ -11,7 +11,8 @@ "axios": "^0.18.0", "bluebird": "^3.5.0", "gatsby-source-filesystem": "^2.0.33", - "lodash": "^4.17.10" + "lodash": "^4.17.10", + "tiny-async-pool": "^1.0.4" }, "devDependencies": { "@babel/cli": "^7.0.0", diff --git a/packages/gatsby-source-drupal/src/__tests__/index.js b/packages/gatsby-source-drupal/src/__tests__/index.js index d4d615ac07a0b..b9f9b0fa42eba 100644 --- a/packages/gatsby-source-drupal/src/__tests__/index.js +++ b/packages/gatsby-source-drupal/src/__tests__/index.js @@ -17,6 +17,7 @@ jest.mock(`gatsby-source-filesystem`, () => { createRemoteFileNode: jest.fn(), } }) + const { createRemoteFileNode } = require(`gatsby-source-filesystem`) const { sourceNodes } = require(`../gatsby-node`) @@ -28,6 +29,15 @@ describe(`gatsby-source-drupal`, () => { const createContentDigest = jest.fn().mockReturnValue(`contentDigest`) const { objectContaining } = expect + const activity = { + start: jest.fn(), + end: jest.fn(), + } + const reporter = { + info: jest.fn(), + activityTimer: jest.fn(() => activity), + } + beforeAll(async () => { const args = { createNodeId, @@ -35,6 +45,7 @@ describe(`gatsby-source-drupal`, () => { actions: { createNode: jest.fn(node => (nodes[node.id] = node)), }, + reporter, } await sourceNodes(args, { baseUrl }) diff --git a/packages/gatsby-source-drupal/src/gatsby-node.js b/packages/gatsby-source-drupal/src/gatsby-node.js index 82dc32ff07c24..5467ab8af45c0 100644 --- a/packages/gatsby-source-drupal/src/gatsby-node.js +++ b/packages/gatsby-source-drupal/src/gatsby-node.js @@ -3,16 +3,32 @@ const _ = require(`lodash`) const { createRemoteFileNode } = require(`gatsby-source-filesystem`) const { URL } = require(`url`) const { nodeFromData } = require(`./normalize`) +const asyncPool = require(`tiny-async-pool`) exports.sourceNodes = async ( - { actions, store, cache, createNodeId, createContentDigest }, - { baseUrl, apiBase, basicAuth, filters, headers, params } + { actions, store, cache, createNodeId, createContentDigest, reporter }, + { + baseUrl, + apiBase, + basicAuth, + filters, + headers, + params, + concurrentFileRequests, + } ) => { const { createNode } = actions + const drupalFetchActivity = reporter.activityTimer(`Fetch data from Drupal`) + const downloadingFilesActivity = reporter.activityTimer( + `Remote file download` + ) // Default apiBase to `jsonapi` apiBase = apiBase || `jsonapi` + // Default concurrentFileRequests to `20` + concurrentFileRequests = concurrentFileRequests || 20 + // Touch existing Drupal nodes so Gatsby doesn't garbage collect them. // _.values(store.getState().nodes) // .filter(n => n.internal.type.slice(0, 8) === `drupal__`) @@ -20,7 +36,7 @@ exports.sourceNodes = async ( // Fetch articles. // console.time(`fetch Drupal data`) - console.log(`Starting to fetch data from Drupal`) + reporter.info(`Starting to fetch data from Drupal`) // TODO restore this // let lastFetched @@ -32,6 +48,8 @@ exports.sourceNodes = async ( // .lastFetched // } + drupalFetchActivity.start() + const data = await axios.get(`${baseUrl}/${apiBase}`, { auth: basicAuth, headers, @@ -94,6 +112,8 @@ exports.sourceNodes = async ( }) ) + drupalFetchActivity.end() + // Make list of all IDs so we can check against that when creating // relationships. const ids = {} @@ -187,48 +207,61 @@ exports.sourceNodes = async ( }) }) - // Download all files. - await Promise.all( - nodes.map(async node => { - let fileNode - if ( - node.internal.type === `files` || - node.internal.type === `file__file` - ) { - try { - let fileUrl = node.url - if (typeof node.uri === `object`) { - // Support JSON API 2.x file URI format https://www.drupal.org/node/2982209 - fileUrl = node.uri.url + reporter.info(`Downloading remote files from Drupal`) + downloadingFilesActivity.start() + + // Download all files (await for each pool to complete to fix concurrency issues) + await asyncPool(concurrentFileRequests, nodes, async node => { + // If we have basicAuth credentials, add them to the request. + const auth = + typeof basicAuth === `object` + ? { + htaccess_user: basicAuth.username, + htaccess_pass: basicAuth.password, } - // Resolve w/ baseUrl if node.uri isn't absolute. - const url = new URL(fileUrl, baseUrl) - // If we have basicAuth credentials, add them to the request. - const auth = - typeof basicAuth === `object` - ? { - htaccess_user: basicAuth.username, - htaccess_pass: basicAuth.password, - } - : {} - fileNode = await createRemoteFileNode({ - url: url.href, - store, - cache, - createNode, - createNodeId, - parentNodeId: node.id, - auth, - }) - } catch (e) { - // Ignore - } - if (fileNode) { - node.localFile___NODE = fileNode.id - } + : {} + let fileNode = null + let fileUrl = `` + let url = {} + + if (node.internal.type === `files` || node.internal.type === `file__file`) { + fileUrl = node.url + + // If node.uri is an object + if (typeof node.uri === `object`) { + // Support JSON API 2.x file URI format https://www.drupal.org/node/2982209 + fileUrl = node.uri.url + } + + // Resolve w/ baseUrl if node.uri isn't absolute. + url = new URL(fileUrl, baseUrl) + + // Create the remote file from the given node + try { + fileNode = await createRemoteFileNode({ + url: url.href, + store, + cache, + createNode, + createNodeId, + parentNodeId: node.id, + auth, + }) + } catch (err) { + reporter.error(err) } - }) - ) - nodes.forEach(n => createNode(n)) + // If the fileNode exists set the node ID of the local file + if (fileNode) { + node.localFile___NODE = fileNode.id + } + } + }) + + downloadingFilesActivity.end() + + // Create each node + for (const node of nodes) { + createNode(node) + } } diff --git a/yarn.lock b/yarn.lock index 4b10d9d8d4657..c49107757d05c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17405,17 +17405,7 @@ react-docgen@^4.1.0: node-dir "^0.1.10" recast "^0.17.3" -react-dom@^16.8.4: - version "16.8.6" - resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-16.8.6.tgz#71d6303f631e8b0097f56165ef608f051ff6e10f" - integrity sha512-1nL7PIq9LTL3fthPqwkvr2zY7phIPjYrT0jp4HjyEQrEROnw4dG41VVwi/wfoCneoleqrNX7iAD+pXebJZwrwA== - dependencies: - loose-envify "^1.1.0" - object-assign "^4.1.1" - prop-types "^15.6.2" - scheduler "^0.13.6" - -react-dom@^16.8.6: +react-dom@^16.8.4, react-dom@^16.8.6: version "16.8.6" resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-16.8.6.tgz#71d6303f631e8b0097f56165ef608f051ff6e10f" integrity sha512-1nL7PIq9LTL3fthPqwkvr2zY7phIPjYrT0jp4HjyEQrEROnw4dG41VVwi/wfoCneoleqrNX7iAD+pXebJZwrwA== @@ -17462,17 +17452,7 @@ react-typography@^0.16.18: resolved "https://registry.yarnpkg.com/react-typography/-/react-typography-0.16.18.tgz#89341b63d615f1dfcf5e471797df5acce5bde1f3" integrity sha512-BFkw59H2mkYRLLtHOT2UZve/9/RticAvL7o7rheOo/+tlwQyfF61wF2XzGRcA3IWASg+DbXnNViyywHBFlNj6A== -react@^16.8.4: - version "16.8.6" - resolved "https://registry.yarnpkg.com/react/-/react-16.8.6.tgz#ad6c3a9614fd3a4e9ef51117f54d888da01f2bbe" - integrity sha512-pC0uMkhLaHm11ZSJULfOBqV4tIZkx87ZLvbbQYunNixAAvjnC+snJCg0XQXn9VIsttVsbZP/H/ewzgsd5fxKXw== - dependencies: - loose-envify "^1.1.0" - object-assign "^4.1.1" - prop-types "^15.6.2" - scheduler "^0.13.6" - -react@^16.8.6: +react@^16.8.4, react@^16.8.6: version "16.8.6" resolved "https://registry.yarnpkg.com/react/-/react-16.8.6.tgz#ad6c3a9614fd3a4e9ef51117f54d888da01f2bbe" integrity sha512-pC0uMkhLaHm11ZSJULfOBqV4tIZkx87ZLvbbQYunNixAAvjnC+snJCg0XQXn9VIsttVsbZP/H/ewzgsd5fxKXw== @@ -18742,14 +18722,6 @@ scheduler@^0.13.6: loose-envify "^1.1.0" object-assign "^4.1.1" -scheduler@^0.13.6: - version "0.13.6" - resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.13.6.tgz#466a4ec332467b31a91b9bf74e5347072e4cd889" - integrity sha512-IWnObHt413ucAYKsD9J1QShUKkbKLQQHdxRyw73sw4FN26iWr3DY/H34xGPe4nmL1DwXyWmSWmMrA9TfQbE/XQ== - dependencies: - loose-envify "^1.1.0" - object-assign "^4.1.1" - schema-utils@^0.4.0, schema-utils@^0.4.4, schema-utils@^0.4.5: version "0.4.7" resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-0.4.7.tgz#ba74f597d2be2ea880131746ee17d0a093c68187" @@ -19040,11 +19012,6 @@ shell-escape@^0.2.0: resolved "https://registry.yarnpkg.com/shell-escape/-/shell-escape-0.2.0.tgz#68fd025eb0490b4f567a027f0bf22480b5f84133" integrity sha1-aP0CXrBJC09WegJ/C/IkgLX4QTM= -shell-escape@^0.2.0: - version "0.2.0" - resolved "https://registry.yarnpkg.com/shell-escape/-/shell-escape-0.2.0.tgz#68fd025eb0490b4f567a027f0bf22480b5f84133" - integrity sha1-aP0CXrBJC09WegJ/C/IkgLX4QTM= - shell-quote@1.6.1, shell-quote@^1.6.1: version "1.6.1" resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.6.1.tgz#f4781949cce402697127430ea3b3c5476f481767" @@ -20448,6 +20415,14 @@ timsort@^0.3.0: resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4" integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q= +tiny-async-pool@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/tiny-async-pool/-/tiny-async-pool-1.0.4.tgz#bbac28a39a754576d8d0615d4e2ad35c87da6169" + integrity sha512-4gdLvReS3WwmPCxZjj38Go673xhEXlK77fVFA2x+dE2Bf9QzDkVQb3rdO1iJt337ybhir42m4zM2GHJjiuFwoA== + dependencies: + semver "^5.5.0" + yaassertion "^1.0.0" + tiny-emitter@^2.0.0: version "2.0.2" resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.0.2.tgz#82d27468aca5ade8e5fd1e6d22b57dd43ebdfb7c" @@ -22340,6 +22315,11 @@ y18n@^3.2.1: resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.0.tgz#95ef94f85ecc81d007c264e190a120f0a3c8566b" integrity sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w== +yaassertion@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/yaassertion/-/yaassertion-1.0.0.tgz#630c5c44c660d064006f1f15d79bd256d373fc9e" + integrity sha512-fepEqRG+/2ZkJBf2ioA4LTOZUWrBN3F2EuKms3zE47M0zqph5aWs6SGiyz9wyzPkowhtiKapHV52IsRBfYCDwA== + yallist@^2.0.0, yallist@^2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52"