Skip to content

Commit

Permalink
fix #7 #19 add String Json return API and add function to realize use…
Browse files Browse the repository at this point in the history
…r custom return json key's value like 'data->name'

fix #7 #19 add String Json return API and add function to realize user
custom return json key's value like 'data->name'
  • Loading branch information
nonacosa committed Apr 23, 2017
1 parent 43a9361 commit ea60947
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 46 deletions.
11 changes: 7 additions & 4 deletions webBee-core/src/main/java/org/bee/webBee/Bee.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.bee.webBee.linker.Request;
import org.bee.webBee.processor.Setting;
import org.bee.webBee.processor.Task;
import org.bee.webBee.utils.JsonUtil;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -65,7 +66,7 @@ public void run() {
COUNT++;
System.out.println("this is Bee.class implement Runnable's run function! --request:" + request.toString());
try {
Thread.sleep(1000);
Thread.sleep(5000);
pageProcessor.process(pageProcessor(request));
System.out.println(" ");
} catch (IOException e) {
Expand Down Expand Up @@ -100,8 +101,8 @@ public void requestProcessor(){
* todo paging next不可以写死
*/
private void requestNextProcessor() {
this.request = new Request(((JSONObject)((JSONObject) JSON.parse(html.getApi())).get("paging")).get("next").toString());
System.out.println("nextUrl--->"+ (((JSONObject)((JSONObject) JSON.parse(html.getApi())).get("paging")).get(setting.getNextUrlKeyOnResult()).toString()));
this.request = new Request(JsonUtil.jsonCustomKey(html.getJsonApi(),setting.getNextUrlKeyOnResult()));
System.out.println("nextUrl--->"+ JsonUtil.jsonCustomKey(html.getJsonApi(),setting.getNextUrlKeyOnResult()));
}

/**
Expand All @@ -110,7 +111,9 @@ private void requestNextProcessor() {
* todo data不能限定死 是否可以在用户自己处理呢?
*/
private boolean checkResultData() {
return JSON.parseArray(((JSONObject) JSON.parse(html.getApi())).get("data").toString()).size()>0;

return JSON.parseArray(html.getJsonApi().get("data").toString()).size()>0;

}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.bee.webBee.download;

import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.util.EntityUtils;
import org.bee.webBee.HttpClient.HttpClientPool;
import org.bee.webBee.html.Html;
import org.bee.webBee.linker.Page;
Expand Down Expand Up @@ -33,7 +34,9 @@ public Page download(Request request, Task task) {
CloseableHttpClient closeableHttpClient = httpClientBuilder.build();
try {
CloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpMethod);
System.out.println(closeableHttpResponse.getStatusLine().getStatusCode());
page.setHtml(closeableHttpResponse);
page.setRequest(request);
task.setHtml(page.getHtml()); //自传递api到Bee处理器
closeableHttpResponse.close();
//todo do while策略处理异常
Expand Down
8 changes: 7 additions & 1 deletion webBee-core/src/main/java/org/bee/webBee/html/Html.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package org.bee.webBee.html;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.util.EntityUtils;
import org.bee.webBee.utils.ElementUtil;
import org.bee.webBee.utils.JsonUtil;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;

Expand Down Expand Up @@ -65,10 +67,14 @@ public Html getDocument() {
return new Html("");
}

public String getApi(){
public String getStringApi(){
return document;
}

public JSONObject getJsonApi(){
return (JSONObject) JSON.parse(document);
}

@Override
public Html $(String selector) {
this.elements = Jsoup.parse(document).select(selector);
Expand Down
12 changes: 11 additions & 1 deletion webBee-core/src/main/java/org/bee/webBee/linker/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public void addWaitRequest(List<String> requests){

public void setHtml(CloseableHttpResponse closeableHttpClient) throws IOException {
this.html = new Html(closeableHttpClient).getDocument();
this.api = this.html.getStringApi();
this.closeableHttpClient = closeableHttpClient;
// this.api = EntityUtils.toString(closeableHttpClient.getEntity());
}

Expand All @@ -54,6 +56,14 @@ public Html getHtml(){

public String getApi() {

return html.getApi();
return api;
}

public Request getRequest() {
return request;
}

public void setRequest(Request request) {
this.request = request;
}
}
17 changes: 16 additions & 1 deletion webBee-core/src/main/java/org/bee/webBee/utils/JsonUtil.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
package org.bee.webBee.utils;

import com.alibaba.fastjson.JSONObject;

/**
* data 2017-04-01 17:42
* E-mail [email protected]
* @author sis.nonacosa
*/
public interface JsonUtil {
public class JsonUtil {

public static String jsonCustomKey (JSONObject json, String s) {
JSONObject temp = json;
String[] customArr = s.split("->");
for(int i=0;i<customArr.length;i++){
if(i == customArr.length-1){
return temp.get(customArr[i]).toString();
}else {
temp = (JSONObject) temp.get(customArr[i]);
}
}

return null;
}
}
6 changes: 0 additions & 6 deletions webBee-redis/src/main/java/webbee/redis/RedisHash.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package webbee.redis;

import org.bee.webBee.DataStoreBase;
import org.bee.webBee.DataStoreKV;

import java.util.Map;

Expand All @@ -12,11 +11,6 @@
*/
public class RedisHash implements DataStoreBase {

public static String set(String key, Map<String,String> hash){
return Redis.create().hmset(key,hash);

}


public boolean insert(String key, Map<String,String> value) {
return Redis.create().hmset(key, value).equals("OK");
Expand Down
37 changes: 37 additions & 0 deletions webBee-redis/src/main/java/webbee/redis/RedisSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package webbee.redis;

import org.bee.webBee.DataStoreBase;
import redis.clients.jedis.Jedis;

import java.util.Map;

/**
* data 2017-04-22 00:19
* E-mail [email protected]
* @author sis.nonacosa
*/
public class RedisSet implements DataStoreBase {
private Jedis redis = Redis.create();

public boolean insert(String key, String value) {
return redis.sadd(key, value).equals(1L);
}

@Override
public Object insert(Object value) {
return null;
}

@Override
public Object select() {
return null;
}


public static RedisSet create(){
return new RedisSet();
}



}
33 changes: 0 additions & 33 deletions webBee-redis/src/test/java/TestHash.java

This file was deleted.

109 changes: 109 additions & 0 deletions webBee-redis/src/test/java/TestRedis.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/**
* Created by zhuang on 2017/4/22.
*/

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.bee.webBee.Bee;
import org.bee.webBee.DataStoreBase;
import org.bee.webBee.linker.Page;
import org.bee.webBee.processor.PageProcessor;
import org.bee.webBee.processor.Setting;
import redis.clients.jedis.Jedis;
import webbee.redis.Redis;
import webbee.redis.RedisHash;
import webbee.redis.RedisSet;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
* data 2017-04-22 00:27
* E-mail [email protected]
*
* @author sis.nonacosa
*/
public class TestRedis implements PageProcessor {

private Setting setting;

private Integer count = 0;

private RedisSet redisHash = new RedisSet();

@Override
public void process(Page page) throws IOException {
// String json = page.getHtml().$("textarea.content").as("content").$("a.question_link").as("title").toJSONString();
String api = page.getApi();

System.out.println(count + " : ---api result:");
// System.out.println(api);
System.out.println(JSON.parse(api));
count++;
List<Map<String, String>> list = ((List<Map<String, String>>) ((JSONObject) JSON.parse(api)).get("data"));
for (int i = 0; i < list.size(); i++) {
Map<String,String> mmap = list.get(i);
// for (Map.Entry<String, String> entry : mmap.entrySet()) {
// if(entry.getValue().contentEquals()){mmap.put(entry.getKey(),"true");}
// if(entry.getValue().equals(false)){mmap.put(entry.getKey(),"false");}

// mmap.put(entry.getKey(), entry.getValue());
// }
// String a;
// }

String nametoken = list.get(i).get("url_token");
System.out.println("list"+list.get(i).toString());
// RedisHash.create().insert("zhihu_user_" + nametoken, list.get(i));

redisHash.insert("zhihu_user_yet",mmap.toString());
// Long sadd = Redis.create().sadd("zhihu_user_yet",mmap.toString());
// if(sadd.equals(0L))

}


// String string = JSON.parse(api).toString();
// System.out.println(list);
// for(int i=0;i<list.size();i++){
// Map<String,String> map = list.get(i);
// for (Map.Entry<String, String> entry : map.entrySet()) {
//// if(entry.getValue()instanceof Boolean){}
// map.put(entry.getKey(),entry.getValue().toString());
// String a;
// }
// }


// RedisHash.create().insert("zhihu_user=" + data.get("name"), data);
}

@Override
public Setting getSetting() {
System.out.println("This is MainDemoByUrl's setting function ...");
// setting = Setting.create().setStartUrl("https://www.zhihu.com/api/v4/members/koubixu/followers?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=0&limit=20");
setting = Setting.create().setStartUrl("https://www.zhihu.com/api/v4/members/wangnuonuo/followers?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=30020&limit=20");
setting = setting.setCookies("z_c0", "Mi4wQUFEQUZwSzEzZ2tBY01ERVdCM0lDUmNBQUFCaEFsVk5WQjcyV0FEb0RnYlI3QnFQSWtzSWMzTjRHQVN0YlNlTW1R");
setting = setting.setCookies("_xsrf", "8e8eedb720402d12bce9b5e611837b6d");
setting = setting.setDomain("zhihu.com");
setting = setting.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
setting = setting.addHeader("Accept-Encoding", "gzip, deflate, sdch, br");
setting = setting.addHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4");
setting = setting.addHeader("Cache-Control", "max-age=0");
setting = setting.addHeader("Connection", "keep-alive");
setting = setting.addHeader("Host", "www.zhihu.com");
setting = setting.addHeader("Upgrade-Insecure-Requests", "1");
setting = setting.addHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
setting = setting.addHeader("Cookie", "aliyungf_tc=AQAAAOeQ+1+bMQcAuTj1eyWWHEWGOFd5; acw_tc=AQAAALOd4Xn1TwcAuTj1e01cA+S2FFI9; q_c1=6061d5105e7144e9986c696caa21bb08|1492701579000|1492701579000; r_cap_id=\"NjBhNDM0ZmQzYTEyNGMxNWIxNDA1MjAwYmUwMDQzODY=|1492701579|48d9e12537fe7f83ad3bcaf9ed9b50c7fc995db2\"; cap_id=\"MDAxNWRlZjA5ZTRmNGNiOWE4OWZjMjBhYmRhZTUwMzg=|1492701579|49943d3f3bd8f8d52c70264684674bb22645946d\"; _xsrf=880dce32ffb2deb6467caadd8299d352; d_c0=\"AHBCTk4QowuPTs0xoWv4_K0tdVn73ZvN2EI=|1492701580\"; _zap=9632bb9d-c70d-40c1-9f1b-3bd23a1116ca; l_n_c=1; __utma=51854390.1545696888.1492701581.1492701581.1492701581.1; __utmb=51854390.0.10.1492701581; __utmc=51854390; __utmz=51854390.1492701581.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100--|2=registration_date=20170416=1^3=entry_date=20170416=1; s-q=%E7%8E%8B%E8%AF%BA%E8%AF%BA; s-i=3; sid=0j69lkf8; s-t=autocomplete; z_c0=Mi4wQUJEQ0JOcEVuUXNBY0VKT1RoQ2pDeGNBQUFCaEFsVk43VjhnV1FCLUVaUDM1NWhXemRMalIwWXNhdE4wZi1tanJn|1492703100|d381cbe3d541399a2b1518a256b9d5a5935f9087");
setting = setting.setHttpMethod("GET");
setting = setting.setNextUrlKeyOnResult("paging->next");
// setting = setting.setNextUrlOnCustom("offset=?&&limit=?");
return setting;
}


public static void main(String[] args) {
Bee.create(new TestRedis()).run();
}
}

0 comments on commit ea60947

Please sign in to comment.