From 18bd85aac68ddddcc5c63b030b7919855a5654ee Mon Sep 17 00:00:00 2001 From: Chad March Date: Wed, 14 Oct 2020 15:03:52 -0800 Subject: [PATCH] Selectively Query Terminal Procedures (#2) --- example.js | 7 ++- index.js | 156 +++++++++++++++++++++++++++++++++++++++++---------- package.json | 1 + 3 files changed, 134 insertions(+), 30 deletions(-) diff --git a/example.js b/example.js index ffed50d..ce66818 100644 --- a/example.js +++ b/example.js @@ -2,7 +2,12 @@ const terminalProcedures = require('./') terminalProcedures.fetchCurrentCycle().then(r => console.log(r)) -terminalProcedures.list('PANC').then(results => { +// Also try updateing the Flag property to include one or more of the following: +// A for only those that were Added since the last effective date +// C for only those that were Changed since the last effective date +// D for only those that were Deleted since the last effective date +// Leave empty to get all regardless of if they've been added or changed +terminalProcedures.list('PANC', { flag: [ 'A', 'C' ], }).then(results => { console.log(results) const out = results.map(tp => { return { diff --git a/index.js b/index.js index e3039b5..466a19c 100644 --- a/index.js +++ b/index.js @@ -2,26 +2,30 @@ const cheerio = require('cheerio') const superagent = require('superagent') const BASE_URL = - 'https://www.faa.gov/air_traffic/flight_info/aeronav/digital_products/dtpp/search/' + 'https://www.faa.gov/air_traffic/flight_info/aeronav/digital_products/dtpp/search' // For some reason the server takes forever to respond without this request header const ACCEPT = 'text/html' +const defaultQueryOptions = { + flag: [], // 'A' - Added, 'C' - Changed, 'D' - Deleted, Empty - All valid procedures +} + /** * A shortcut to the list() method */ -const terminalProcedures = (module.exports = (icaos, options = {}) => { +const terminalProcedures = (module.exports = (icaos, options = defaultQueryOptions) => { return terminalProcedures.list(icaos, options) }) /** * Main fetching method; accepts one or more ICAO codes */ -terminalProcedures.list = (icaos, options = {}) => { +terminalProcedures.list = (icaos, options = defaultQueryOptions) => { if (Array.isArray(icaos)) { - return Promise.all(icaos.map(listOne)) + return Promise.all(icaos.map(icao => listOne(icao, options))) } - return listOne(icaos) + return listOne(icaos, options) } /** @@ -38,31 +42,88 @@ const fetchCurrentCycle = (terminalProcedures.fetchCurrentCycle = async () => { /** * Using the current cycle, fetch the terminal procedures for a single ICAO code + * Optionally request only the Added, Created, Deleted, Added and Created, or All procedures + * @param {String} icao - The Airport Identifier + * @param {Object} options - One or more options to filter the procedures + * @returns {Array} - The scraped terminal procedures */ -const listOne = async icao => { +const listOne = async (icao, options) => { const searchCycle = await fetchCurrentCycle() - let procedures = [] - let lastPageFetched = 0 - let lastNumFetched = 1 - while (lastNumFetched > 0) { - const page = await superagent - .get( - `${BASE_URL}/results/?cycle=${searchCycle}&ident=${icao}&sort=type&dir=asc&page=${lastPageFetched + - 1}` - ) - .set('Accept', ACCEPT) - .then(res => parse(res.text)) - if (page) { - lastNumFetched = page.length - lastPageFetched += 1 - procedures = procedures.concat(page) - } else { - break + + // Build up a base set of query params + let urlParams = [ 'sort=type', 'dir=asc', `ident=${icao}`, ] + // The searchCycle is optional as the API assumes the latest already + // and this function uses the latest cycle + if (searchCycle) { + urlParams.push(`cycle=${searchCycle}`) + } + + // Manage these separately than the base `urlParams` since these + // are used to issue separate requests whereas the `urlParams` are + // applied to every request + let filterFlags = [] + // Validate the flag option first + if (typeof options === 'object' && Array.isArray(options.flag) && options.flag.length) { + for (let f = 0, fLen = options.flag.length; f < fLen; f++) { + switch (options.flag[f].toUpperCase()) { + case 'A': + filterFlags.push(`&filterAdded=1`) + break + case 'C': + filterFlags.push(`&filterChanged=1`) + break + case 'D': + filterFlags.push(`&filterDeleted=1`) + break + default: + // Do nothing and just get them all + filterFlags.push('') + } } } + else { + // Fallback to just getting them all + filterFlags.push('') + } + + // This will be the base url for all requests for all flags for all pages + const procUrl = `${BASE_URL}/results/?${urlParams.join('&')}` + const procedures = [] + + let i, len, _reqUrl + // Loop the flags to start getting the procedures for each flag type + for (i = 0, len = filterFlags.length; i < len; i++) { + // Set up the base req url for this flag type + _reqUrl = `${procUrl}${filterFlags[i]}` + // Issue an initial request without any page param + let { results, pageCount, } = await getProcedures(_reqUrl) + if (results.length) { + // Flatten the results in to the base array that will be returned + procedures.push(...results) + // If there are more than one pages of results + if (pageCount > 1) { + // Set up a loop to fire of subsequent queries for each remaining page, skipping the first page + // since that was already requested + let j + for (j = 2; j < pageCount; j++) { + let { results: _results, pageCount: _pageCount } = await getProcedures(`${_reqUrl}&page=${j}`) + procedures.push(..._results) + if (_pageCount !== pageCount) { + pageCount = _pageCount + } + } + } + } + } + return procedures } +const getProcedures = async url => superagent + .get(url) + .set('Accept', ACCEPT) + .then(res => parse(res.text)) + /** * Parsing helper methods */ @@ -85,13 +146,33 @@ const extractRow = $row => { return null } + const flag = text($row, 6) + let flagExplicit = 'Unchanged' + switch (flag) { + case '': + flagExplicit = 'Unchanged' + break + case 'A': + flagExplicit = 'Added' + break + case 'C': + flagExplicit = 'Changed' + break + case 'D': + flagExplicit = 'Deleted' + break + default: + flagExplicit = 'Unknown edit state' + } + return { state: text($row, 1), city: text($row, 2), airport: text($row, 3), ident: text($row, 4), vol: text($row, 5), - flag: text($row, 6), + flag, + flagExplicit, type, procedure: { name: text($row, 8), @@ -105,24 +186,41 @@ const extractRow = $row => { } /** - * Parse the response HTML into JSON + * Parse the response HTML into JSON + * @param {string} html + * @returns {Object} - The scraped and transformed data and the number of result pages */ const parse = html => { const $ = cheerio.load(html) const $resultsTable = $('#resultsTable') + const noResultsFound = $('.message-box.info').text().trim() + let results = [] + + // As part of the response scraping, scrape the pagination node to get the number of
  • + // nodes that are not the li.arrow nodes for prev next + let pageCount = 0 + const pager = $('#content .pagination li:not(.arrows) a') + if (pager.length) { + pageCount = parseInt(pager[pager.length - 1].attribs.href.split('&page=')[1]) + } - if (!$resultsTable.html()) { + if (!!noResultsFound && noResultsFound === 'No results found.') { + console.warn(noResultsFound) + return { results, pageCount } + } + else if (!$resultsTable.html()) { console.error('Unable to parse the #resultsTable page element') - return null + return { results, pageCount } } - const results = $resultsTable + results = $resultsTable .find('tr') .toArray() .map(row => extractRow($(row))) .filter(x => !!x) if (results.length > 0) { - return results + return { results, pageCount } } + return { results, pageCount } } diff --git a/package.json b/package.json index cb558ad..5c00534 100644 --- a/package.json +++ b/package.json @@ -70,6 +70,7 @@ }, "repository": "fdesjardins/terminal-procedures", "scripts": { + "debug": "node ./example.js", "test": "eslint *.js && nyc mocha test.js" } }