From 4bbb5c7408f070d83f814754c985c34a33b9fe59 Mon Sep 17 00:00:00 2001 From: Copyes Date: Tue, 4 Apr 2017 22:28:22 +0800 Subject: [PATCH] learn --- JSFun/CodeLearning/ajax/ajax.js | 19 ++++ JSFun/CodeLearning/ajax/index.html | 37 +++++++ JSFun/CodeLearning/spider/dytt.js | 44 ++++++++ JSFun/CodeLearning/spider/index.js | 135 +++++++++++++++++++++++++ JSFun/CodeLearning/spider/package.json | 18 ++++ 5 files changed, 253 insertions(+) create mode 100644 JSFun/CodeLearning/ajax/ajax.js create mode 100644 JSFun/CodeLearning/ajax/index.html create mode 100644 JSFun/CodeLearning/spider/dytt.js create mode 100644 JSFun/CodeLearning/spider/index.js create mode 100644 JSFun/CodeLearning/spider/package.json diff --git a/JSFun/CodeLearning/ajax/ajax.js b/JSFun/CodeLearning/ajax/ajax.js new file mode 100644 index 0000000..850de1f --- /dev/null +++ b/JSFun/CodeLearning/ajax/ajax.js @@ -0,0 +1,19 @@ +function getXHR(){ + var xhr = null; + + if(window.XMLHttpRequest){ + xhr = new XMLHttpRequest(); + }else if(window.ActiveXObject){ + try { + xhr = new ActiveXObject("Msxml2.XMLHTTP"); + }catch(e){ + try{ + xhr = new ActiveXObject("Microsoft.XMLHTTP"); + }catch(e){ + alert("大兄弟,你的浏览器不支持ajax!"); + } + } + } + + return xhr; +} diff --git a/JSFun/CodeLearning/ajax/index.html b/JSFun/CodeLearning/ajax/index.html new file mode 100644 index 0000000..3412ec6 --- /dev/null +++ b/JSFun/CodeLearning/ajax/index.html @@ -0,0 +1,37 @@ + + + + + + ajax + + + + + + + + diff --git a/JSFun/CodeLearning/spider/dytt.js b/JSFun/CodeLearning/spider/dytt.js new file mode 100644 index 0000000..41c16cf --- /dev/null +++ b/JSFun/CodeLearning/spider/dytt.js @@ -0,0 +1,44 @@ +var cheerio = require('cheerio'); + +var http = require('http'); + +var iconv = require('iconv-lite'); + +var url = "http://www.dytt8.net/index.htm"; +// 获取最新电影标题 +function getMoviesTitle(url){ + http.get(url, function(res){ + var chunks = []; + res.on('data', function(chunk){ + chunks.push(chunk); + }); + + res.on('end', function(){ + var titles = []; + + var shtml = iconv.decode(Buffer.concat(chunks), 'gb2312'); + var $ = cheerio.load(shtml, { decodeEntities: false }); + + $(".co_content8 .inddline").each(function(index, element){ + var $element = $(element); + console.log($element[0]); + + if($element[0].attribs.width == "85%"){ + var str = $element.text(); + var newStr = str.replace(/\r\n/g,''); + titles.push({ + title: newStr + }); + } + }); + + console.log(titles); + }); + }); +} +// 获取最新电影的bt +function getMovieBt(){ + +} + +getMoviesTitle(url); diff --git a/JSFun/CodeLearning/spider/index.js b/JSFun/CodeLearning/spider/index.js new file mode 100644 index 0000000..5e079b2 --- /dev/null +++ b/JSFun/CodeLearning/spider/index.js @@ -0,0 +1,135 @@ +'use strict' + +let fs = require("fs"); +let cheerio = require("cheerio"); +let async = require("async"); + +let request = require("superagent"); +require('superagent-charset')(request); + +// 基本信息 +const Config = { + startPage: 1, + endPage: 1, + downloadImg: true, + downloadConcurrent: 10, + currentImgType: "scy" // 当前爬虫要爬的图片的类型 +}; +// 图片类型 +const ImgType = { + ecy: "http://tu.hanhande.com/ecy/ecy_", //二次元 总页码: 50 + scy: "http://tu.hanhande.com/scy/scy_", //三次元 总页码: 64 + cos: "http://tu.hanhande.com/cos/cos_", //cosPlay 总页码: 20 +} +// 异步获取html内容 +let getHtmlAsync = function(url){ + return new Promise(function(resolve,reject){ + request.get(url).charset('gbk').end(function(err, res){ + err ? reject(err) : resolve(cheerio.load(res.text)); + }); + }); +} + +let getAlbumsAsync = function(){ + return new Promise(function(resolve, reject){ + console.log('start albums'); + let albums = []; + let q = async.queue(async function(url, taskDone){ + try { + let $ = await getHtmlAsync(url); + + console.log(`download ${url} success`); + + $('.picList em a').each(function(index, element){ + albums.push({ + title: element.children[1].attribs.alt, + url: element.attribs.href, + imgList: [] + }); + }); + + } catch(err){ + console.log(`Error: get album list - download ${url} err : ${err} `); + } finally { + taskDone(); + } + }, 10); + + // 所有的任务都执行完了以后调用下面的函数 + q.drain = function(){ + console.log(`Get album list complete`); + resolve(albums); + } + + let pageUrls = []; + let imageTypeUrl = ImgType[Config.currentImgType]; + for(let i = Config.startPage; i < Config.endPage; i++){ + pageUrls.push(imageTypeUrl + `${i}.shtml`); + } + + q.push(pageUrls); + }) +} + +let getImageListAsync = function(albumList){ + return new Promise(function(resolve, reject){ + console.log('start get album`s imgList'); + + let q = async.queue(async function({url: albumuRL, title: albumTitle, imgList}, taskDone){ + try { + let $ = await getHtmlAsync(albumUrl); + console.log(`get album ${albumTitle} image list done`); + $('#picLists img').each(function (idx, element) { + imgList.push(element.attribs.src); + }); + } catch (err) { + console.log(`Error :get image list - download ${albumUrl} err : ${err}`); + } + finally { + taskDone();// 一次任务结束 + } + }, 10); + + q.drain = function () { + console.log('Get image list complete'); + resolve(albumsList); + } + + //将所有任务加入队列 + q.push(albumsList); + }); +} + +// 保存图册信息到json文件 +function writeJsonToFile(albumList){ + let folder = `json-${Config.currentImgType}-${Config.startPage}-${Config.endPage}`; + fs.mkdirSync(folder); + + let filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.json`; + fs.writeFileSync(filePath, JSON.stringify(albumsList)); + + + let simpleAlbums = []; + + const sliceLen = "http://www.hanhande.com/upload/".length; + albumList.forEach(function({ title:albumTitle, url: albumUrl, imgList}){ + let imgListTemp = []; + + imgList.forEach(function(url){ + imgListTemp.push(url.slice(sliceLen)); + }); + simpleAlbums.push({ title: albumTitle, url: albumUrl, imgList: imgListTemp}); + + }); + + filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.min.json`; + + fs.writeFileSync(filePath, JSON.stringify(simpleAlbums)); +} + + + + + + + diff --git a/JSFun/CodeLearning/spider/package.json b/JSFun/CodeLearning/spider/package.json new file mode 100644 index 0000000..1ea5622 --- /dev/null +++ b/JSFun/CodeLearning/spider/package.json @@ -0,0 +1,18 @@ +{ + "name": "spider", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "start": "node index.js" + }, + "author": "copy", + "license": "ISC", + "dependencies": { + "async": "^2.1.5", + "cheerio": "^0.22.0", + "iconv-lite": "^0.4.15", + "superagent": "^3.5.1", + "superagent-charset": "^1.1.1" + } +}