Skip to content

Commit

Permalink
开始重新启动webBee!
Browse files Browse the repository at this point in the history
开始重新启动webBee!
  • Loading branch information
nonacosa committed May 21, 2018
1 parent 361ee72 commit 7d723d8
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
51 changes: 51 additions & 0 deletions webBee-core/src/test/java/example/BaiduSearchByUrl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package example; /**
* Created by zhuang on 2017/3/23.
*/



import org.bee.webBee.Bee;
import org.bee.webBee.linker.Page;
import org.bee.webBee.processor.PageProcessor;
import org.bee.webBee.processor.Setting;


/**
* 类似servlet 实现HttpServlet doGet doPost 方法的方式定义爬虫
* data 2017-03-23 01:19
* E-mail [email protected]
* @author sis.nonacosa
*/

public class BaiduSearchByUrl implements PageProcessor {

private Setting setting;

@Override
public void process(Page page) {
//todo page.getJson/html/string().$('textarea.content').as('content').bulid().$('#img').as('img')
//todo 期望结果: {content:[],img:[]} 一条{}多条[] 的json格式
//todo page.nextUrl('span>ss>s')
//todo 直接获取api接口
String json = page.getHtml().$("body").toJSONString();
//
System.out.println(json);
}

@Override
public Setting getSetting() {
System.out.println("This is example.MainDemoByUrl's setting function ...");
setting = Setting.create().setStartUrl("https://www.baidu.com/s?wd=666666");
//添加cookie,模拟登陆 也可以选择setting.addCookie(key,value)添加cookie;
// setting = setting.addHeader("Cookie","d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; aliyungf_tc=AQAAAKuBthrGBQkAOiAvcSpAeDQcAU+4; r_cap_id=\"ZGE2ZDIzYWRhMDA0NDM0MDgxNzI4M2ZmN2U2ODc0ZDk=|1503375703|f27506bb96ac61f5dc4eee2e6b221be5711d82db\"; cap_id=\"MmVkMzczNzg4ZjE1NDYxMTljNTg2ODk4YjliMDdiMWY=|1503375703|6f093dae6733f3d24a38941b76b0c604d1757318\"; __utma=51854390.728883040.1503302344.1503302344.1503375703.2; __utmb=51854390.0.10.1503375703; __utmc=51854390; __utmz=51854390.1503375703.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20160504=1^3=entry_date=20170420=1; l_cap_id=\"MDRhYmNjOTI0Zjg3NDRlZjk1OWJhNDg0YWE5ZWFmZmQ=|1503375748|35ca7670e26e9aab906ea34b3f790164397aa543\"; z_c0=Mi4xWVBMREJRQUFBQUFBY0VKT1RoQ2pDeGNBQUFCaEFsVk5tajdEV1FEYnVFYVBWRmdrSFNMLURKV25LblhRQ0d2TEhn|1503375770|eabf91031457017a63bacaf49356d85485986971; unlock_ticket=\"QUpCQ19Cc2ZRZ3dYQUFBQVlRSlZUYUs0bTFrYjdxbndjamtadFJrMWFyVFFxNHRwWFctTThRPT0=|1503375770|bdb7d52666c0238036a4a7e2536c46528e294dd1\"; _xsrf=3c0570c7-fc62-47bd-a2fb-613eb2457e31");
setting = setting.setDomain("baidu.com");
setting = setting.setHttpMethod("GET");
return setting;
}



public static void main(String[] args) {
Bee.create(new BaiduSearchByUrl()).run();
}
}
4 changes: 2 additions & 2 deletions webBee-core/src/test/java/example/MainDemoByApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ public Setting getSetting() {
setting = setting.addHeader("Accept-Language","zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4");
setting = setting.addHeader("Cache-Control","max-age=0");
setting = setting.addHeader("Connection","keep-alive");
setting = setting.addHeader("Host","www.zhihu.com");
setting = setting.addHeader("Host","zhihu.com");
setting = setting.addHeader("Upgrade-Insecure-Requests","1");
setting = setting.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
setting = setting.addHeader("Cookie","d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; aliyungf_tc=AQAAAKuBthrGBQkAOiAvcSpAeDQcAU+4; r_cap_id=\"ZGE2ZDIzYWRhMDA0NDM0MDgxNzI4M2ZmN2U2ODc0ZDk=|1503375703|f27506bb96ac61f5dc4eee2e6b221be5711d82db\"; cap_id=\"MmVkMzczNzg4ZjE1NDYxMTljNTg2ODk4YjliMDdiMWY=|1503375703|6f093dae6733f3d24a38941b76b0c604d1757318\"; __utma=51854390.728883040.1503302344.1503302344.1503375703.2; __utmb=51854390.0.10.1503375703; __utmc=51854390; __utmz=51854390.1503375703.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20160504=1^3=entry_date=20170420=1; l_cap_id=\"MDRhYmNjOTI0Zjg3NDRlZjk1OWJhNDg0YWE5ZWFmZmQ=|1503375748|35ca7670e26e9aab906ea34b3f790164397aa543\"; z_c0=Mi4xWVBMREJRQUFBQUFBY0VKT1RoQ2pDeGNBQUFCaEFsVk5tajdEV1FEYnVFYVBWRmdrSFNMLURKV25LblhRQ0d2TEhn|1503375770|eabf91031457017a63bacaf49356d85485986971; unlock_ticket=\"QUpCQ19Cc2ZRZ3dYQUFBQVlRSlZUYUs0bTFrYjdxbndjamtadFJrMWFyVFFxNHRwWFctTThRPT0=|1503375770|bdb7d52666c0238036a4a7e2536c46528e294dd1\"; _xsrf=3c0570c7-fc62-47bd-a2fb-613eb2457e31");
setting = setting.addHeader("Cookie","d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; q_c1=6061d5105e7144e9986c696caa21bb08|1506316115000|1492701579000; __DAYU_PP=mvU2ZM7RnfJeqjIYBJna2911b246d4a8; __utma=155987696.350837047.1524803761.1524803761.1524803761.1; __utmz=155987696.1524803761.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; q_c1=6061d5105e7144e9986c696caa21bb08|1525321980000|1492701579000; _xsrf=5348aed3-5d4e-404b-812e-88552e539882; tgw_l7_route=56f3b730f2eb8b75242a8095a22206f8; capsion_ticket=\"2|1:0|10:1526919135|14:capsion_ticket|44:MTM3NWY5MjFkM2YwNDAzNjljMDgxODM1ZWZhMGRlYTY=|42990fb74a71d0bbb63bb01972fcf014dca09c8efaad0eb163d024817ab21a73\"; z_c0=\"2|1:0|10:1526919283|4:z_c0|92:Mi4xUC1DVUNRQUFBQUFBY0VKT1RoQ2pDeVlBQUFCZ0FsVk5jejd3V3dEU0IzVVNJWWN4LWstb2ltUjhOZ0lmdURSVkNB|036748deda83a8431bb417b446b24dbe37a41f2bec6cab489397dcbe04fc354e\"");
setting = setting.setHttpMethod("GET");
//你需要解析的json数据格式 data->paging->next
setting = setting.setNextUrlKeyOnResult("paging->next");
Expand Down
2 changes: 1 addition & 1 deletion webBee-core/src/test/java/example/MainDemoByUrl.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public Setting getSetting() {
System.out.println("This is example.MainDemoByUrl's setting function ...");
setting = Setting.create().setStartUrl("http://www.ZhiHu.com/explore");
//添加cookie,模拟登陆 也可以选择setting.addCookie(key,value)添加cookie;
setting = setting.addHeader("Cookie","d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; q_c1=6061d5105e7144e9986c696caa21bb08|1500789237000|1492701579000; aliyungf_tc=AQAAAKuBthrGBQkAOiAvcSpAeDQcAU+4; r_cap_id=\"ZGE2ZDIzYWRhMDA0NDM0MDgxNzI4M2ZmN2U2ODc0ZDk=|1503375703|f27506bb96ac61f5dc4eee2e6b221be5711d82db\"; cap_id=\"MmVkMzczNzg4ZjE1NDYxMTljNTg2ODk4YjliMDdiMWY=|1503375703|6f093dae6733f3d24a38941b76b0c604d1757318\"; __utma=51854390.728883040.1503302344.1503302344.1503375703.2; __utmb=51854390.0.10.1503375703; __utmc=51854390; __utmz=51854390.1503375703.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20160504=1^3=entry_date=20170420=1; l_cap_id=\"MDRhYmNjOTI0Zjg3NDRlZjk1OWJhNDg0YWE5ZWFmZmQ=|1503375748|35ca7670e26e9aab906ea34b3f790164397aa543\"; z_c0=Mi4xWVBMREJRQUFBQUFBY0VKT1RoQ2pDeGNBQUFCaEFsVk5tajdEV1FEYnVFYVBWRmdrSFNMLURKV25LblhRQ0d2TEhn|1503375770|eabf91031457017a63bacaf49356d85485986971; unlock_ticket=\"QUpCQ19Cc2ZRZ3dYQUFBQVlRSlZUYUs0bTFrYjdxbndjamtadFJrMWFyVFFxNHRwWFctTThRPT0=|1503375770|bdb7d52666c0238036a4a7e2536c46528e294dd1\"; _xsrf=3c0570c7-fc62-47bd-a2fb-613eb2457e31");
setting = setting.addHeader("Cookie","d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; q_c1=6061d5105e7144e9986c696caa21bb08|1506316115000|1492701579000; __DAYU_PP=mvU2ZM7RnfJeqjIYBJna2911b246d4a8; __utma=155987696.350837047.1524803761.1524803761.1524803761.1; __utmz=155987696.1524803761.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; q_c1=6061d5105e7144e9986c696caa21bb08|1525321980000|1492701579000; _xsrf=5348aed3-5d4e-404b-812e-88552e539882; tgw_l7_route=56f3b730f2eb8b75242a8095a22206f8; capsion_ticket=\"2|1:0|10:1526919135|14:capsion_ticket|44:MTM3NWY5MjFkM2YwNDAzNjljMDgxODM1ZWZhMGRlYTY=|42990fb74a71d0bbb63bb01972fcf014dca09c8efaad0eb163d024817ab21a73\"; z_c0=\"2|1:0|10:1526919283|4:z_c0|92:Mi4xUC1DVUNRQUFBQUFBY0VKT1RoQ2pDeVlBQUFCZ0FsVk5jejd3V3dEU0IzVVNJWWN4LWstb2ltUjhOZ0lmdURSVkNB|036748deda83a8431bb417b446b24dbe37a41f2bec6cab489397dcbe04fc354e\"");
setting = setting.setDomain("zhihu.com");
setting = setting.setHttpMethod("GET");
return setting;
Expand Down

0 comments on commit 7d723d8

Please sign in to comment.