-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
253 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
function getXHR(){ | ||
var xhr = null; | ||
|
||
if(window.XMLHttpRequest){ | ||
xhr = new XMLHttpRequest(); | ||
}else if(window.ActiveXObject){ | ||
try { | ||
xhr = new ActiveXObject("Msxml2.XMLHTTP"); | ||
}catch(e){ | ||
try{ | ||
xhr = new ActiveXObject("Microsoft.XMLHTTP"); | ||
}catch(e){ | ||
alert("大兄弟,你的浏览器不支持ajax!"); | ||
} | ||
} | ||
} | ||
|
||
return xhr; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<head> | ||
<meta charset="UTF-8"> | ||
<title>ajax</title> | ||
</head> | ||
|
||
<body> | ||
</body> | ||
<script src="./ajax.js"></script> | ||
<script> | ||
|
||
function ajax(url, method) { | ||
var xhr = getXHR(); | ||
xhr.onreadystatechange = function() { | ||
console.log('xhr.readyState:' + this.readyState); | ||
} | ||
xhr.onloadstart = function() { | ||
console.log('onloadStart'); | ||
} | ||
xhr.onload = function() { | ||
console.log('onload'); | ||
} | ||
xhr.open(method, url, true); | ||
xhr.setRequestHeader('Cache-Control', 3600); | ||
xhr.send(); | ||
} | ||
var timer = setTimeout(function() { | ||
console.log('setTimeout'); | ||
}, 0); | ||
|
||
ajax('http://h0.hucdn.com/open/201712/194b950e0e18b0fd_750x334.jpg', 'GET'); | ||
console.warn('这里的log并不是最先打印出来的.'); | ||
</script> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
var cheerio = require('cheerio'); | ||
|
||
var http = require('http'); | ||
|
||
var iconv = require('iconv-lite'); | ||
|
||
var url = "http://www.dytt8.net/index.htm"; | ||
// 获取最新电影标题 | ||
function getMoviesTitle(url){ | ||
http.get(url, function(res){ | ||
var chunks = []; | ||
res.on('data', function(chunk){ | ||
chunks.push(chunk); | ||
}); | ||
|
||
res.on('end', function(){ | ||
var titles = []; | ||
|
||
var shtml = iconv.decode(Buffer.concat(chunks), 'gb2312'); | ||
var $ = cheerio.load(shtml, { decodeEntities: false }); | ||
|
||
$(".co_content8 .inddline").each(function(index, element){ | ||
var $element = $(element); | ||
console.log($element[0]); | ||
|
||
if($element[0].attribs.width == "85%"){ | ||
var str = $element.text(); | ||
var newStr = str.replace(/\r\n/g,''); | ||
titles.push({ | ||
title: newStr | ||
}); | ||
} | ||
}); | ||
|
||
console.log(titles); | ||
}); | ||
}); | ||
} | ||
// 获取最新电影的bt | ||
function getMovieBt(){ | ||
|
||
} | ||
|
||
getMoviesTitle(url); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
'use strict' | ||
|
||
let fs = require("fs"); | ||
let cheerio = require("cheerio"); | ||
let async = require("async"); | ||
|
||
let request = require("superagent"); | ||
require('superagent-charset')(request); | ||
|
||
// 基本信息 | ||
const Config = { | ||
startPage: 1, | ||
endPage: 1, | ||
downloadImg: true, | ||
downloadConcurrent: 10, | ||
currentImgType: "scy" // 当前爬虫要爬的图片的类型 | ||
}; | ||
// 图片类型 | ||
const ImgType = { | ||
ecy: "http://tu.hanhande.com/ecy/ecy_", //二次元 总页码: 50 | ||
scy: "http://tu.hanhande.com/scy/scy_", //三次元 总页码: 64 | ||
cos: "http://tu.hanhande.com/cos/cos_", //cosPlay 总页码: 20 | ||
} | ||
// 异步获取html内容 | ||
let getHtmlAsync = function(url){ | ||
return new Promise(function(resolve,reject){ | ||
request.get(url).charset('gbk').end(function(err, res){ | ||
err ? reject(err) : resolve(cheerio.load(res.text)); | ||
}); | ||
}); | ||
} | ||
|
||
let getAlbumsAsync = function(){ | ||
return new Promise(function(resolve, reject){ | ||
console.log('start albums'); | ||
let albums = []; | ||
let q = async.queue(async function(url, taskDone){ | ||
try { | ||
let $ = await getHtmlAsync(url); | ||
|
||
console.log(`download ${url} success`); | ||
|
||
$('.picList em a').each(function(index, element){ | ||
albums.push({ | ||
title: element.children[1].attribs.alt, | ||
url: element.attribs.href, | ||
imgList: [] | ||
}); | ||
}); | ||
|
||
} catch(err){ | ||
console.log(`Error: get album list - download ${url} err : ${err} `); | ||
} finally { | ||
taskDone(); | ||
} | ||
}, 10); | ||
|
||
// 所有的任务都执行完了以后调用下面的函数 | ||
q.drain = function(){ | ||
console.log(`Get album list complete`); | ||
resolve(albums); | ||
} | ||
|
||
let pageUrls = []; | ||
let imageTypeUrl = ImgType[Config.currentImgType]; | ||
for(let i = Config.startPage; i < Config.endPage; i++){ | ||
pageUrls.push(imageTypeUrl + `${i}.shtml`); | ||
} | ||
|
||
q.push(pageUrls); | ||
}) | ||
} | ||
|
||
let getImageListAsync = function(albumList){ | ||
return new Promise(function(resolve, reject){ | ||
console.log('start get album`s imgList'); | ||
|
||
let q = async.queue(async function({url: albumuRL, title: albumTitle, imgList}, taskDone){ | ||
try { | ||
let $ = await getHtmlAsync(albumUrl); | ||
console.log(`get album ${albumTitle} image list done`); | ||
$('#picLists img').each(function (idx, element) { | ||
imgList.push(element.attribs.src); | ||
}); | ||
} catch (err) { | ||
console.log(`Error :get image list - download ${albumUrl} err : ${err}`); | ||
} | ||
finally { | ||
taskDone();// 一次任务结束 | ||
} | ||
}, 10); | ||
|
||
q.drain = function () { | ||
console.log('Get image list complete'); | ||
resolve(albumsList); | ||
} | ||
|
||
//将所有任务加入队列 | ||
q.push(albumsList); | ||
}); | ||
} | ||
|
||
// 保存图册信息到json文件 | ||
function writeJsonToFile(albumList){ | ||
let folder = `json-${Config.currentImgType}-${Config.startPage}-${Config.endPage}`; | ||
fs.mkdirSync(folder); | ||
|
||
let filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.json`; | ||
fs.writeFileSync(filePath, JSON.stringify(albumsList)); | ||
|
||
|
||
let simpleAlbums = []; | ||
|
||
const sliceLen = "http://www.hanhande.com/upload/".length; | ||
albumList.forEach(function({ title:albumTitle, url: albumUrl, imgList}){ | ||
let imgListTemp = []; | ||
|
||
imgList.forEach(function(url){ | ||
imgListTemp.push(url.slice(sliceLen)); | ||
}); | ||
simpleAlbums.push({ title: albumTitle, url: albumUrl, imgList: imgListTemp}); | ||
|
||
}); | ||
|
||
filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.min.json`; | ||
|
||
fs.writeFileSync(filePath, JSON.stringify(simpleAlbums)); | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"name": "spider", | ||
"version": "1.0.0", | ||
"description": "", | ||
"main": "index.js", | ||
"scripts": { | ||
"start": "node index.js" | ||
}, | ||
"author": "copy", | ||
"license": "ISC", | ||
"dependencies": { | ||
"async": "^2.1.5", | ||
"cheerio": "^0.22.0", | ||
"iconv-lite": "^0.4.15", | ||
"superagent": "^3.5.1", | ||
"superagent-charset": "^1.1.1" | ||
} | ||
} |