forked from zYeee/taobaoSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
zhuye
authored and
zhuye
committed
Dec 29, 2014
0 parents
commit 96b1295
Showing
66 changed files
with
34,547 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<?php | ||
$conn=mysqli_connect("127.0.0.1","taobao","xinxizhuaqu"); | ||
mysqli_query($conn,"set names 'utf8'"); | ||
$database="taobao"; | ||
mysqli_select_db($conn,$database); | ||
mysqli_query($conn,"SET AUTOCOMMIT=0"); | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
磁动力 | ||
智慧baby | ||
网上无双 | ||
我的时尚部落 | ||
伟伟and颖颖 | ||
幸福慧 | ||
8柠檬茶8 | ||
pzlmilk99 | ||
jiming_neu | ||
zhuoxiaoqing302 | ||
南在南方0812 | ||
小小52066 | ||
亲亲女鞋温州分公司 | ||
xiaobitou | ||
zhaohui0330 | ||
光光de旺铺 | ||
格思美朵女鞋 | ||
小腰精080616 | ||
时尚麦乐 | ||
苏打small | ||
声声不息 | ||
周董_2008hz | ||
夏诺店铺 | ||
心时代淘淘 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<?php | ||
require_once("conn.php"); | ||
$dels=file_get_contents("del"); | ||
$dels=explode("\n",$dels); | ||
foreach($dels as $del){ | ||
if($del=="") | ||
break; | ||
$query="delete from `50012825` where nick='$del'"; | ||
echo $query."\n"; | ||
mysqli_query($conn,$query); | ||
} | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
<?php | ||
require_once("prop.php"); | ||
require_once("conn.php"); | ||
function calTotal($search_res){ | ||
$ch=curl_init($search_res) ; | ||
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true) ; | ||
curl_setopt($ch,CURLOPT_BINARYTRANSFER,true) ; | ||
$output=curl_exec($ch) ; | ||
$output=iconv('GB18030','utf-8',$output); | ||
$output=json_decode($output,true); | ||
if($output=="") | ||
echo "ERROR!!!!!"; | ||
$totalNumber=$output['selectedCondition']['totalNumber']; | ||
if(strpos($output['selectedCondition']['totalNumber'],"万")){ | ||
$totalNumber*=10000; | ||
} | ||
return $totalNumber; | ||
} | ||
function getinfo($search_res){ | ||
global $conn; | ||
$prop="hws.m.taobao.com/cache/wdetail/5.0/?ttid=2013@taobao_h5_1.0.0"; | ||
$ch = curl_init($search_res) ; | ||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true) ; // 获取数据返回 | ||
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true) ; // 在启用 CURLOPT_RETURNTRANSFER 时候将获取数据返回 | ||
$output = curl_exec($ch) ; | ||
$output=iconv('GB18030','utf-8',$output); | ||
$output= json_decode($output,true); | ||
$flag1=$flag2=$flag3=$flag4=$flag5=$flag6=$flag7=false; | ||
$query0="insert ignore into score (itemid) values"; | ||
$query1="insert ignore into `50012027` values"; | ||
$query2="insert ignore into `50012028` values"; | ||
$query3="insert ignore into `50012032` values"; | ||
$query4="insert ignore into `50012033` values"; | ||
$query5="insert ignore into `50012042` values"; | ||
$query6="insert ignore into `50012047` values"; | ||
$query7="insert ignore into `50012825` values"; | ||
if(!isset($output['itemList'])){ | ||
return 0; | ||
} | ||
foreach ($output['itemList'] as $child){ | ||
$title=addslashes(str_replace("‘","'",$child['title']));//标题 | ||
$price=$child['price'];//原价 | ||
$currentPrice=$child['currentPrice'];//折扣价 | ||
$nick=$child['nick'];//旺旺id | ||
$sellerid=$child['sellerId'];//sid | ||
$loc=$child['loc'];//所在地 | ||
$itemid=substr($child['href'],35,strlen($child['href'])-35);//itemid | ||
$image=$child['image'];//主图 | ||
$require_url=$prop."&id=".$itemid; | ||
$ch = curl_init($require_url) ;//属性 | ||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true) ; | ||
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true) ; | ||
$res = curl_exec($ch) ; | ||
$res= json_decode($res,true); | ||
if(!isset($res['data']['seller'])){ | ||
continue; | ||
} | ||
$review_per=$res['data']['seller']['goodRatePercentage']; | ||
if(!isset($res['data']['seller']['creditLevel'])){ | ||
continue; | ||
} | ||
$creditLevel=$res['data']['seller']['creditLevel']; | ||
$isNew=0; | ||
foreach($child['icon']['all'] as $checkNew){ | ||
if($checkNew['id']=='xinpin'){ | ||
$isNew=1; | ||
break; | ||
} | ||
} | ||
$isFlag=0; | ||
if($price<=30) | ||
$priceRange="0-30元"; | ||
else if($price>30&&$price<=100) | ||
$priceRange="31-100元"; | ||
else if($price>101&&$price<=200) | ||
$priceRange="101-200元"; | ||
else if($price>201&&$price<=500) | ||
$priceRange="201-500元"; | ||
else if($price>501&&$price<=1000) | ||
$priceRange="501-1000元"; | ||
else if($price>1001&&$price<=2000) | ||
$priceRange="1001-2000元"; | ||
else | ||
$priceRange="2000元以上"; | ||
|
||
$data=json_decode($res['data']['apiStack'][0]['value'],true); | ||
if(!isset($data['data'])){ | ||
continue; | ||
} | ||
$SoldQuantity=$data['data']['itemInfoModel']['totalSoldQuantity']; //销量 | ||
$quantity=$data['data']['itemInfoModel']['quantity']; | ||
if(!isset($res['data']['itemInfoModel']['categoryId'])){ | ||
continue; | ||
} | ||
$item_prop=new prop(); | ||
foreach($res['data']['props'] as $props){ | ||
$item_prop->get_prop($props); | ||
} | ||
$query_="('$itemid'),"; | ||
$query="('$title',$price,$currentPrice,'$nick','$loc','$itemid','$image',$SoldQuantity,'$review_per',$creditLevel,$quantity,null,$isNew,$isFlag,'$priceRange','$item_prop->p_1','$item_prop->p_2','$item_prop->p_3','$item_prop->p_4','$item_prop->p_5','$item_prop->p_6','$item_prop->p_7','$item_prop->p_8','$item_prop->p_9','$item_prop->p_10','$item_prop->p_11','$item_prop->p_12','$item_prop->p_13','$item_prop->p_14','$item_prop->p_15','$item_prop->p_16','$item_prop->p_17','$item_prop->p_18','$item_prop->p_19','$item_prop->p_20','$item_prop->p_21','$item_prop->p_22','$item_prop->p_23','$item_prop->p_24','$item_prop->p_25','$item_prop->p_26','$item_prop->p_27','$item_prop->p_28','$item_prop->p_29','$item_prop->p_30','$item_prop->p_31','$item_prop->p_32','$item_prop->p_33','$item_prop->p_34',null),"; | ||
switch ($res['data']['itemInfoModel']['categoryId']){ | ||
case "50012027":$query1.=$query;$flag1=true;break; | ||
case "50012028":$query2.=$query;$flag2=true;break; | ||
case "50012032":$query3.=$query;$flag3=true;break; | ||
case "50012033":$query4.=$query;$flag4=true;break; | ||
case "50012042":$query5.=$query;$flag5=true;break; | ||
case "50012047":$query6.=$query;$flag6=true;break; | ||
case "50012825":$query7.=$query;$flag7=true;break; | ||
} | ||
$query0.=$query_; | ||
} | ||
mysqli_query($conn,"BEGIN"); | ||
if(mysqli_query($conn,substr($query0,0,-1))){ | ||
if($flag1) | ||
if(!mysqli_query($conn,substr($query1,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
if($flag2) | ||
if(!mysqli_query($conn,substr($query2,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
if($flag3) | ||
if(!mysqli_query($conn,substr($query3,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
if($flag4) | ||
if(!mysqli_query($conn,substr($query4,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
if($flag5) | ||
if(!mysqli_query($conn,substr($query5,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
if($flag6){ | ||
if(!mysqli_query($conn,substr($query6,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
} | ||
if($flag7) | ||
if(!mysqli_query($conn,substr($query7,0,-1))){ | ||
mysqli_query($conn,"ROLLBACK"); | ||
} | ||
} | ||
$res=mysqli_affected_rows($conn); | ||
mysqli_query($conn,"COMMIT"); | ||
return $res; | ||
} | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/php | ||
<?php | ||
require_once("function.php"); | ||
$cat=$argv[1]; | ||
$sort=""; | ||
$loc=""; | ||
if($argc==3){ | ||
$sort=$argv[2]; | ||
} | ||
if($argc==4){ | ||
$sort=$argv[2]; | ||
$loc=$argv[3]; | ||
} | ||
$search_res="http://list.taobao.com/itemlist/.htm?json=on&pSize=96&_input_charset=utf-8&cat=".$cat; | ||
if($sort!="") | ||
$search_res.="&sort=".$sort; | ||
if($loc!="") | ||
$search_res.="&loc=".$loc; | ||
|
||
$total=0; | ||
echo $cat.":".date("Y-m-d H:i:s", time())." ".$sort.$loc." ".memory_get_usage()."begin\n"; | ||
for($i=0;$i<100;$i++){ | ||
$total+=getinfo($search_res."&s=".($i*96)); | ||
} | ||
echo $cat." END ".$total." ".date("Y-m-d H:i:s", time())." ".memory_get_usage()."\n"; | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/php | ||
<?php | ||
require_once("conn.php"); | ||
$query="select * from score where score is null"; | ||
$rows=mysqli_query($conn,$query); | ||
$uri = "http://112.124.7.124/JZGTitle/MingServlet/titleHome.titleTest"; | ||
$flag=false; | ||
while(1){ | ||
$no=0; | ||
$mh=curl_multi_init(); | ||
$chs=array(); | ||
while(1){ | ||
$row=mysqli_fetch_row($rows); | ||
if($row==null){ | ||
$flag=true; | ||
break; | ||
} | ||
if($no==100) | ||
break; | ||
$no++; | ||
$itemid=$row[0]; | ||
$data = array ( | ||
'_MING_ROOT_' => array( | ||
'title'=>"http://item.taobao.com/item.htm?id:".$itemid | ||
), | ||
'_MING_CLASS_'=>"_MING_CLASS_" | ||
); | ||
$data=json_encode($data,true); | ||
$ch = curl_init ($uri); | ||
array_push($chs,array('ch'=>$ch,'itemid'=>$itemid)); | ||
curl_setopt ( $ch, CURLOPT_POST, 1 ); | ||
curl_setopt ( $ch, CURLOPT_HEADER, 0 ); | ||
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); | ||
curl_setopt ( $ch, CURLOPT_POSTFIELDS, $data ); | ||
curl_multi_add_handle($mh,$ch); | ||
} | ||
$active=null; | ||
do{ | ||
curl_multi_exec($mh,$active); | ||
}while($active); | ||
foreach($chs as $ch){ | ||
$itemid=$ch['itemid']; | ||
$return=json_decode(curl_multi_getcontent($ch['ch']),true); | ||
if(!isset($return['desc'])){ | ||
$query="delete from score where itemid='$itemid'"; | ||
echo "\ndel:$itemid\n"; | ||
mysqli_query($conn,$query); | ||
continue; | ||
} | ||
$evaluation=$return['desc']['desc']; | ||
$score=$return['desc']['score']; | ||
$catPrior=$return['desc']['catPrior']; | ||
$wordNum=$return['desc']['wordNum']; | ||
$highPVNum=$return['desc']['highPVNum']; | ||
$highCharNum=$return['desc']['highCharNum']; | ||
$scoreRate=$return['desc']['scoreRate']; | ||
$catPriorRate=$return['desc']['catPriorRate']; | ||
$wordRate=$return['desc']['wordNumRate']; | ||
$highPVRate=$return['desc']['highPVRate']; | ||
$highCharRate=$return['desc']['highCharRate']; | ||
|
||
$query="update score set evaluation='$evaluation',score=$score,catPrior=$catPrior,wordNum=$wordNum,highPVNum=$highPVNum,highCharNum=$highCharNum,scoreRate=$scoreRate,catPriorRate=$catPriorRate,wordRate=$wordRate,highPVRate=$highPVRate,highCharRate=$highCharRate where itemid='$itemid'"; | ||
mysqli_query($conn,$query); | ||
mysqli_query($conn,"COMMIT"); | ||
echo "OK"; | ||
} | ||
if($flag) | ||
break; | ||
} | ||
mysqli_close($conn); | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/php | ||
<?php | ||
require_once("locnamelist.php"); | ||
require_once("/usr/local/app/function.php"); | ||
echo date("Y-m-d H:i:s", time())."\n"; | ||
$locnames=json_decode($locname,true); | ||
$cats=array("50012027","50012042","50012028","50029451","50027236","50043914","50012047"); | ||
$search_res="http://list.taobao.com/itemlist/.htm?json=on&pSize=96&sort=_oldstart&_input_charset=utf-8"; | ||
foreach($cats as $cat){ | ||
$search_url=$search_res."&cat=$cat"; | ||
echo $cat.":"; | ||
echo "\n"; | ||
foreach($locnames as $province=>$cities){ | ||
echo $province.":"; | ||
$result=0; | ||
$search_url_p=$search_url."&loc=$province"; | ||
$total=calTotal($search_url_p); | ||
if($total>=10000){ | ||
foreach ($cities as $city){ | ||
$search_url_c=$search_url."&loc=$city"; | ||
$totalNum=calTotal($search_url_c); | ||
$totalNum=ceil($totalNum/96); | ||
if($totalNum>100) | ||
$totalNum=100; | ||
for($i=0;$i<$totalNum;$i++){ | ||
$result+=getinfo($search_url_c."&s=".($i*96)); | ||
} | ||
} | ||
} | ||
else{ | ||
$totalNum=ceil($total/96); | ||
if($totalNum>100) | ||
$totalNum=100; | ||
for($i=0;$i<$totalNum;$i++){ | ||
$result+=getinfo($search_url_p."&s=".($i*96)); | ||
} | ||
} | ||
echo $result."\n"; | ||
} | ||
} | ||
echo date("Y-m-d H:i:s", time())."\n"; | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
<?php | ||
$locname='{"北京":["北京"],"上海":["上海","崇明","朱家角"],"天津":["天津"],"重庆":["重庆"],"浙江":["杭州","安吉","慈溪","定海","奉化","海盐","黄岩","湖州","嘉兴","金华","临安","临海","丽水","宁波","瓯海","平湖","千岛湖","衢州","江山","瑞安","绍兴","嵊州","台州","温岭","温州","舟山"],"安徽":["合肥","安庆","蚌埠","亳州","巢湖","滁州","阜阳","贵池","淮北","淮化","淮南","黄山","九华山","六安","马鞍山","宿州","铜陵","屯溪","芜湖","宣城"],"福建":["福州","福安","龙岩","南平","宁德","莆田","泉州","三明","邵武","石狮","永安","武夷山","厦门","漳州"],"甘肃":["兰州","白银","定西","敦煌","甘南","金昌","酒泉","临夏","平凉","天水","武都","武威","西峰","张掖"],"广东":["广州","潮阳","潮州","澄海","东莞","佛山","河源","惠州","江门","揭阳","开平","茂名","梅州","清远","汕头","汕尾","韶关","深圳","顺德","阳江","英德","云浮","增城","湛江","肇庆","中山","珠海"],"广西":["南宁","百色","北海","桂林","防城港","河池","贺州","柳州","钦州","梧州","玉林"],"贵州":["贵阳","安顺","毕节","都匀","凯里","六盘水","铜仁","兴义","玉屏","遵义"],"海南":["海口","儋县","陵水","琼海","三亚","五指山","万宁"],"河北":["石家庄","保定","北戴河","沧州","承德","丰润","邯郸","衡水","廊坊","南戴河","秦皇岛","唐山","新城","邢台","张家口"],"黑龙江":["哈尔滨","北安","大庆","大兴安岭","鹤岗","黑河","佳木斯","鸡西","牡丹江","齐齐哈尔","七台河","双鸭山","绥化","伊春"],"河南":["郑州","安阳","鹤壁","潢川","焦作","济源","开封","漯河","洛阳","南阳","平顶山","濮阳","三门峡","商丘","新乡","信阳","许昌","周口","驻马店"],"湖北":["武汉","恩施","鄂州","黄冈","黄石","荆门","荆州","潜江","十堰","随州","武穴","仙桃","咸宁","襄阳","襄樊","孝感","宜昌"],"湖南":["长沙","常德","郴州","衡阳","怀化","吉首","娄底","邵阳","湘潭","益阳","岳阳","永州","张家界","株洲"],"江苏":["南京","常熟","常州","海门","淮安","江都","江阴","昆山","连云港","南通","启东","沭阳","宿迁","苏州","太仓","泰州","同里","无锡","徐州","盐城","扬州","宜兴","仪征","张家港","镇江","周庄"],"江西":["南昌","抚州","赣州","吉安","景德镇","井冈山","九江","庐山","萍乡","上饶","新余","宜春","鹰潭"],"吉林":["长春","白城","白山","珲春","辽源","梅河","吉林","四平","松原","通化","延吉"],"辽宁":["沈阳","鞍山","本溪","朝阳","大连","丹东","抚顺","阜新","葫芦岛","锦州","辽阳","盘锦","铁岭","营口"],"内蒙古":["呼和浩特","阿拉善盟","包头","赤峰","东胜","海拉尔","集宁","临河","通辽","乌海","乌兰浩特","锡林浩特"],"宁夏":["银川","固源","石嘴山","吴忠"],"青海":["西宁","德令哈","格尔木","共和","海东","海晏","玛沁","同仁","玉树"],"山东":["济南","滨州","兖州","德州","东营","菏泽","济宁","莱芜","聊城","临沂","蓬莱","青岛","曲阜","日照","泰安","潍坊","威海","烟台","枣庄","淄博"],"山西":["太原","长治","大同","候马","晋城","离石","临汾","宁武","朔州","忻州","阳泉","榆次","运城"],"陕西":["西安","安康","宝鸡","汉中","渭南","商州","绥德","铜川","咸阳","延安","榆林"],"四川":["成都","巴中","达州","德阳","都江堰","峨眉山","广安","广元","九寨沟","康定","乐山","泸州","马尔康","绵阳","眉山","南充","内江","攀枝花","遂宁","汶川","西昌","雅安","宜宾","自贡","资阳"],"新疆":["乌鲁木齐","阿克苏","阿勒泰","阿图什","博乐","昌吉","东山","哈密","和田","喀什","克拉玛依","库车","库尔勒","奎屯","石河子","塔城","吐鲁番","伊宁"],"西藏":["拉萨","阿里","昌都","林芝","那曲","日喀则","山南"],"云南":["昆明","大理","保山","楚雄","东川","个旧","景洪","开远","临沧","丽江","六库","潞西","曲靖","思茅","文山","西双版纳","玉溪","中甸","昭通"]}'; | ||
?> |
Oops, something went wrong.