diff --git a/lib/assets/sitemapper.d.ts b/lib/assets/sitemapper.d.ts new file mode 100644 index 0000000..bad9111 --- /dev/null +++ b/lib/assets/sitemapper.d.ts @@ -0,0 +1,201 @@ +/** + * Sitemap Parser + * + * Copyright (c) 2020 Sean Thomas Burke + * Licensed under the MIT license. + * @author Sean Burke <@seantomburke> + */ +/// +import { SitemapperOptions } from '../../sitemapper'; +/** + * @typedef {Object} Sitemapper + */ +export default class Sitemapper { + url: string; + timeout: number; + timeoutTable: Object; + requestHeaders: any; + debug: boolean; + /** + * Construct the Sitemapper class + * + * @params {Object} options to set + * @params {string} [options.url] - the Sitemap url (e.g https://wp.seantburke.com/sitemap.xml) + * @params {Timeout} [options.timeout] - @see {timeout} + * + * @example let sitemap = new Sitemapper({ + * url: 'https://wp.seantburke.com/sitemap.xml', + * timeout: 15000 + * }); + */ + constructor(options?: SitemapperOptions); + /** + * Gets the sites from a sitemap.xml with a given URL + * + * @public + * @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) + * @returns {Promise} + * @example sitemapper.fetch('example.xml') + * .then((sites) => console.log(sites)); + */ + fetch(url?: string): Promise<{ + url: string; + sites: string[]; + }>; + /** + * Get the timeout + * + * @example console.log(sitemapper.timeout); + * @returns {Timeout} + */ + static get timeout(): Number; + /** + * Set the timeout + * + * @public + * @param {Timeout} duration + * @example sitemapper.timeout = 15000; // 15 seconds + */ + static set timeout(duration: Number); + /** + * + * @param {string} url - url for making requests. Should be a link to a sitemaps.xml + * @example sitemapper.url = 'https://wp.seantburke.com/sitemap.xml' + */ + static set url(url: string); + /** + * Get the url to parse + * @returns {string} + * @example console.log(sitemapper.url) + */ + static get url(): string; + /** + * Setter for the debug state + * @param {Boolean} option - set whether to show debug logs in output. + * @example sitemapper.debug = true; + */ + static set debug(option: boolean); + /** + * Getter for the debug state + * @returns {Boolean} + * @example console.log(sitemapper.debug) + */ + static get debug(): boolean; + /** + * Requests the URL and uses parsestringPromise to parse through and find the data + * + * @private + * @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) + * @returns {Promise} + */ + parse(url?: string): Promise<{ + error: any; + data: any; + }>; + /** + * Timeouts are necessary for large xml trees. This will cancel the call if the request is taking + * too long, but will still allow the promises to resolve. + * + * @private + * @param {string} url - url to use as a hash in the timeoutTable + * @param {Promise} requester - the promise that creates the web request to the url + */ + initializeTimeout(url: string, requester: { + cancel: Function; + }): void; + /** + * Recursive function that will go through a sitemaps tree and get all the sites + * + * @private + * @recursive + * @param {string} url - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) + * @returns {Promise | Promise} + */ + crawl(url: string): Promise>; + /** + * Gets the sites from a sitemap.xml with a given URL + * + * @deprecated + * @param {string} url - url to query + * @param {getSitesCallback} callback - callback for sites and error + * @callback + */ + getSites(url: string, callback: any): Promise; + /** + * Check to see if the url is a gzipped url + * + * @param {string} url - url to query + * @returns {Boolean} + */ + isGzip(url: string): boolean; + /** + * Decompress the gzipped response body using zlib.gunzip + * + * @param {Buffer} body - body of the gzipped file + * @returns {Boolean} + */ + decompressResponseBody(body: Buffer): Promise; +} +/** + * Callback for the getSites method + * + * @callback getSitesCallback + * @param {Object} error - error from callback + * @param {Array} sites - an Array of sitemaps + */ +/** + * Timeout in milliseconds + * + * @typedef {Number} Timeout + * the number of milliseconds before all requests timeout. The promises will still resolve so + * you'll still receive parts of the request, but maybe not all urls + * default is 15000 which is 15 seconds + */ +/** + * Resolve handler type for the promise in this.parse() + * + * @typedef {Object} ParseData + * + * @property {Error} error that either comes from `parsestringPromise` or `got` or custom error + * @property {Object} data + * @property {string} data.url - URL of sitemap + * @property {Array} data.urlset - Array of returned URLs + * @property {string} data.urlset.url - single Url + * @property {Object} data.sitemapindex - index of sitemap + * @property {string} data.sitemapindex.sitemap - Sitemap + * @example { + * error: "There was an error!" + * data: { + * url: 'https://linkedin.com', + * urlset: [{ + * url: 'https://www.linkedin.com/project1' + * },[{ + * url: 'https://www.linkedin.com/project2' + * }] + * } + * } + */ +/** + * Resolve handler type for the promise in this.parse() + * + * @typedef {Object} SitesData + * + * @property {string} url - the original url used to query the data + * @property {SitesArray} sites + * @example { + * url: 'https://linkedin.com/sitemap.xml', + * sites: [ + * 'https://linkedin.com/project1', + * 'https://linkedin.com/project2' + * ] + * } + */ +/** + * An array of urls + * + * @typedef {string[]} SitesArray + * @example [ + * 'https://www.google.com', + * 'https://www.linkedin.com' + * ] + */ diff --git a/lib/assets/sitemapper.js b/lib/assets/sitemapper.js index 1b35266..9fa56b5 100644 --- a/lib/assets/sitemapper.js +++ b/lib/assets/sitemapper.js @@ -1,2 +1,2 @@ -"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2js=require("xml2js"),_got=_interopRequireDefault(require("got")),_zlib=_interopRequireDefault(require("zlib")),_url=_interopRequireDefault(require("url")),_path=_interopRequireDefault(require("path"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug}fetch(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0b.cancel(),this.timeout)}crawl(a){var b=this;return _asyncToGenerator(function*(){try{var{error:g,data:h}=yield b.parse(a);if(clearTimeout(b.timeoutTable[a]),g)return b.debug&&console.error("Error occurred during \"crawl('".concat(a,"')\":\n\r Error: ").concat(g)),[];if(h&&h.urlset&&h.urlset.url){b.debug&&console.debug("Urlset found during \"crawl('".concat(a,"')\""));var i=h.urlset.url.map(a=>a.loc&&a.loc[0]);return[].concat(i)}if(h&&h.sitemapindex){b.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var c=h.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),d=c.map(a=>b.crawl(a)),e=yield Promise.all(d),f=e.filter(a=>!a.error).reduce((a,b)=>a.concat(b),[]);return f}return b.debug&&console.error("Unknown state during \"crawl('".concat(a,")'\":"),g,h),[]}catch(a){b.debug&&b.debug&&console.error(a)}})()}getSites(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0{var d=Buffer.from(a);_zlib.default.gunzip(d,(a,d)=>{a?c(a):b(d)})})}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default; +"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2js=require("xml2js"),_got=_interopRequireDefault(require("got")),_zlib=_interopRequireDefault(require("zlib")),_url=_interopRequireDefault(require("url")),_path=_interopRequireDefault(require("path")),_buffer=require("buffer");function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug}fetch(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0b.cancel(),this.timeout)}crawl(a){var b=this;return _asyncToGenerator(function*(){try{var{error:g,data:h}=yield b.parse(a);if(clearTimeout(b.timeoutTable[a]),g)return b.debug&&console.error("Error occurred during \"crawl('".concat(a,"')\":\n\r Error: ").concat(g)),[];if(h&&h.urlset&&h.urlset.url){b.debug&&console.debug("Urlset found during \"crawl('".concat(a,"')\""));var i=h.urlset.url.map(a=>a.loc&&a.loc[0]);return[].concat(i)}if(h&&h.sitemapindex){b.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var c=h.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),d=c.map(a=>b.crawl(a)),e=yield Promise.all(d),f=e.filter(a=>!a.error).reduce((a,b)=>a.concat(b),[]);return f}return b.debug&&console.error("Unknown state during \"crawl('".concat(a,")'\":"),g,h),[]}catch(a){b.debug&&b.debug&&console.error(a)}})()}getSites(){var a=arguments,b=this;return _asyncToGenerator(function*(){var c=0{var d=_buffer.Buffer.from(a);_zlib.default.gunzip(d,(a,d)=>{a?c(a):b(d)})})}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default; //# sourceMappingURL=sitemapper.js.map \ No newline at end of file diff --git a/lib/examples/google.js b/lib/examples/google.js deleted file mode 100644 index ea2caa7..0000000 --- a/lib/examples/google.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper.js"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}var Google=new _sitemapper.default({url:"https://www.google.com/work/sitemap.xml",debug:!1,timeout:15e3});Google.fetch().then(a=>console.log(a.sites)).catch(a=>console.log(a)); -//# sourceMappingURL=google.js.map \ No newline at end of file diff --git a/lib/examples/index.js b/lib/examples/index.js deleted file mode 100644 index 4d45219..0000000 --- a/lib/examples/index.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}var exampleURL="https://www.walmart.com/sitemap_topic.xml",sitemapper=new _sitemapper.default({url:"https://www.walmart.com/sitemap_topic.xml",debug:!0,timeout:1});_asyncToGenerator(function*(){try{var a=yield sitemapper.fetch();console.log(a)}catch(a){console.error(a)}})(); -//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/package.json b/package.json index 4695ac8..e95cac3 100644 --- a/package.json +++ b/package.json @@ -18,8 +18,7 @@ }, "license": "MIT", "files": [ - "lib", - "sitemapper.d.ts" + "lib" ], "main": "./lib/assets/sitemapper.js", "types": "./sitemapper.d.ts", @@ -33,12 +32,13 @@ "url": "http://www.seantburke.com" }, "scripts": { - "compile": "babel src -d lib -s && tsc --project ./src/tests/", + "compile": "tsc -d --sourceMap --outDir lib -t esnext ./src/**/*.ts && babel lib -d lib -s", "build": "npm run clean && npm run compile", "start": "npm run build && node lib/examples/index.js", "test": "npm run build && mocha ./lib/tests/*.js && npm run lint", "lint": "eslint src", "clean": "rm -rf lib", + "tsc": "tsc", "docs": "documentation build ./src/assets/sitemapper.js -f md > docs.md" }, "maintainers": [ diff --git a/src/assets/sitemapper.ts b/src/assets/sitemapper.ts index 5ec88b5..8f4bb1b 100644 --- a/src/assets/sitemapper.ts +++ b/src/assets/sitemapper.ts @@ -7,13 +7,15 @@ */ import { parseStringPromise } from 'xml2js'; -import got, { Headers, OptionsOfTextResponseBody } from 'got'; +// @ts-ignore +import got from 'got'; +// @ts-ignore import zlib from 'zlib'; +// @ts-ignore import Url from 'url'; +// @ts-ignore import path from 'path'; import { SitemapperOptions, SitemapperResponse} from '../../sitemapper'; -import { ErrorCallback } from 'typescript'; -import { Response } from 'got'; import { Buffer } from 'buffer'; /** @@ -38,7 +40,7 @@ export default class Sitemapper { * timeout: 15000 * }); */ - constructor(options: SitemapperOptions) { + constructor(options?: SitemapperOptions) { const settings: SitemapperOptions = options || { requestHeaders: {}}; this.url = settings.url; this.timeout = settings.timeout || 15000; @@ -294,7 +296,7 @@ export default class Sitemapper { * @param {string} url - url to query * @returns {Boolean} */ - isGzip(url) { + isGzip(url: string) { const parsed = Url.parse(url); const ext = path.extname(parsed.path); return ext === '.gz'; diff --git a/src/tests/test.js b/src/tests/test.js index d48943f..9559500 100644 --- a/src/tests/test.js +++ b/src/tests/test.js @@ -51,8 +51,8 @@ describe('Sitemapper', function () { }); it('should set url', () => { - sitemapper.url = 1000; - sitemapper.url.should.equal(1000); + sitemapper.url = 'https://wp.seantburke.com/sitemap.xml'; + sitemapper.url.should.equal('https://wp.seantburke.com/sitemap.xml'); }); }); diff --git a/src/tests/test.ts.ts b/src/tests/test.ts.ts index 488a430..89f4f8b 100644 --- a/src/tests/test.ts.ts +++ b/src/tests/test.ts.ts @@ -1,7 +1,7 @@ import 'async'; import 'assert'; import 'should'; -import isUrl = require('is-url'); +const isUrl = require('is-url'); // @ts-ignore import Sitemapper from '../../lib/assets/sitemapper.js'; @@ -52,8 +52,8 @@ describe('Sitemapper', function () { }); it('should set url', () => { - sitemapper.url = 1000; - sitemapper.url.should.equal(1000); + sitemapper.url = 'https://wp.seantburke.com/sitemap.xml'; + sitemapper.url.should.equal('https://wp.seantburke.com/sitemap.xml'); }); });