Skip to content

Commit

Permalink
learn
Browse files Browse the repository at this point in the history
  • Loading branch information
Copyes committed Apr 4, 2017
1 parent 1ae4d45 commit 4bbb5c7
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 0 deletions.
19 changes: 19 additions & 0 deletions JSFun/CodeLearning/ajax/ajax.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
function getXHR(){
var xhr = null;

if(window.XMLHttpRequest){
xhr = new XMLHttpRequest();
}else if(window.ActiveXObject){
try {
xhr = new ActiveXObject("Msxml2.XMLHTTP");
}catch(e){
try{
xhr = new ActiveXObject("Microsoft.XMLHTTP");
}catch(e){
alert("大兄弟,你的浏览器不支持ajax!");
}
}
}

return xhr;
}
37 changes: 37 additions & 0 deletions JSFun/CodeLearning/ajax/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<title>ajax</title>
</head>

<body>
</body>
<script src="./ajax.js"></script>
<script>

function ajax(url, method) {
var xhr = getXHR();
xhr.onreadystatechange = function() {
console.log('xhr.readyState:' + this.readyState);
}
xhr.onloadstart = function() {
console.log('onloadStart');
}
xhr.onload = function() {
console.log('onload');
}
xhr.open(method, url, true);
xhr.setRequestHeader('Cache-Control', 3600);
xhr.send();
}
var timer = setTimeout(function() {
console.log('setTimeout');
}, 0);

ajax('http://h0.hucdn.com/open/201712/194b950e0e18b0fd_750x334.jpg', 'GET');
console.warn('这里的log并不是最先打印出来的.');
</script>

</html>
44 changes: 44 additions & 0 deletions JSFun/CodeLearning/spider/dytt.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
var cheerio = require('cheerio');

var http = require('http');

var iconv = require('iconv-lite');

var url = "http://www.dytt8.net/index.htm";
// 获取最新电影标题
function getMoviesTitle(url){
http.get(url, function(res){
var chunks = [];
res.on('data', function(chunk){
chunks.push(chunk);
});

res.on('end', function(){
var titles = [];

var shtml = iconv.decode(Buffer.concat(chunks), 'gb2312');
var $ = cheerio.load(shtml, { decodeEntities: false });

$(".co_content8 .inddline").each(function(index, element){
var $element = $(element);
console.log($element[0]);

if($element[0].attribs.width == "85%"){
var str = $element.text();
var newStr = str.replace(/\r\n/g,'');
titles.push({
title: newStr
});
}
});

console.log(titles);
});
});
}
// 获取最新电影的bt
function getMovieBt(){

}

getMoviesTitle(url);
135 changes: 135 additions & 0 deletions JSFun/CodeLearning/spider/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
'use strict'

let fs = require("fs");
let cheerio = require("cheerio");
let async = require("async");

let request = require("superagent");
require('superagent-charset')(request);

// 基本信息
const Config = {
startPage: 1,
endPage: 1,
downloadImg: true,
downloadConcurrent: 10,
currentImgType: "scy" // 当前爬虫要爬的图片的类型
};
// 图片类型
const ImgType = {
ecy: "http://tu.hanhande.com/ecy/ecy_", //二次元 总页码: 50
scy: "http://tu.hanhande.com/scy/scy_", //三次元 总页码: 64
cos: "http://tu.hanhande.com/cos/cos_", //cosPlay 总页码: 20
}
// 异步获取html内容
let getHtmlAsync = function(url){
return new Promise(function(resolve,reject){
request.get(url).charset('gbk').end(function(err, res){
err ? reject(err) : resolve(cheerio.load(res.text));
});
});
}

let getAlbumsAsync = function(){
return new Promise(function(resolve, reject){
console.log('start albums');
let albums = [];
let q = async.queue(async function(url, taskDone){
try {
let $ = await getHtmlAsync(url);

console.log(`download ${url} success`);

$('.picList em a').each(function(index, element){
albums.push({
title: element.children[1].attribs.alt,
url: element.attribs.href,
imgList: []
});
});

} catch(err){
console.log(`Error: get album list - download ${url} err : ${err} `);
} finally {
taskDone();
}
}, 10);

// 所有的任务都执行完了以后调用下面的函数
q.drain = function(){
console.log(`Get album list complete`);
resolve(albums);
}

let pageUrls = [];
let imageTypeUrl = ImgType[Config.currentImgType];
for(let i = Config.startPage; i < Config.endPage; i++){
pageUrls.push(imageTypeUrl + `${i}.shtml`);
}

q.push(pageUrls);
})
}

let getImageListAsync = function(albumList){
return new Promise(function(resolve, reject){
console.log('start get album`s imgList');

let q = async.queue(async function({url: albumuRL, title: albumTitle, imgList}, taskDone){
try {
let $ = await getHtmlAsync(albumUrl);
console.log(`get album ${albumTitle} image list done`);
$('#picLists img').each(function (idx, element) {
imgList.push(element.attribs.src);
});
} catch (err) {
console.log(`Error :get image list - download ${albumUrl} err : ${err}`);
}
finally {
taskDone();// 一次任务结束
}
}, 10);

q.drain = function () {
console.log('Get image list complete');
resolve(albumsList);
}

//将所有任务加入队列
q.push(albumsList);
});
}

// 保存图册信息到json文件
function writeJsonToFile(albumList){
let folder = `json-${Config.currentImgType}-${Config.startPage}-${Config.endPage}`;
fs.mkdirSync(folder);

let filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.json`;
fs.writeFileSync(filePath, JSON.stringify(albumsList));


let simpleAlbums = [];

const sliceLen = "http://www.hanhande.com/upload/".length;
albumList.forEach(function({ title:albumTitle, url: albumUrl, imgList}){
let imgListTemp = [];

imgList.forEach(function(url){
imgListTemp.push(url.slice(sliceLen));
});
simpleAlbums.push({ title: albumTitle, url: albumUrl, imgList: imgListTemp});

});

filePath = `./${folder}/${Config.currentImgType}-${Config.startPage}-${Config.endPage}.min.json`;

fs.writeFileSync(filePath, JSON.stringify(simpleAlbums));
}







18 changes: 18 additions & 0 deletions JSFun/CodeLearning/spider/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "spider",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"author": "copy",
"license": "ISC",
"dependencies": {
"async": "^2.1.5",
"cheerio": "^0.22.0",
"iconv-lite": "^0.4.15",
"superagent": "^3.5.1",
"superagent-charset": "^1.1.1"
}
}

0 comments on commit 4bbb5c7

Please sign in to comment.