Skip to content

Commit

Permalink
特殊城市或区县处理
Browse files Browse the repository at this point in the history
1. 中山市、东莞市、儋州市没有县级行政区划
2. 嘉峪关市下有一个县级行政区划叫市辖区(code: 620201),
   因此也视为没有县级行政区划,但是 code 需要保留处理。
3. 三沙市下有县级行政区划,但是在 “最新县及县以上行政区划代码” 中
   没有,因此需要手动加上。
4. 福建省泉州市金门县没有乡镇级行政区划
  • Loading branch information
modood committed Aug 11, 2017
1 parent bc2e6ed commit 5b44d16
Show file tree
Hide file tree
Showing 10 changed files with 1,274 additions and 39 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@
```
$ npm install
$ node output.js
$ npm run build
```

![preview](preview.png)

> 提示:
>
> 由于抓取乡镇数据为异步操作,每次输出的乡镇数据顺序可能不同
>
> 但是不会影响其它数据的顺序以及所有数据的完整性。
## TEST
Expand Down
2 changes: 1 addition & 1 deletion dist/address3.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/address4.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/areas.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/streets.json

Large diffs are not rendered by default.

15 changes: 13 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function outputJSON () {
console.log('[1/3] 正在生成 “省份、城市” 二级联动数据...')
var pc = getAddressPC(provinces, cities)
console.log('[2/3] 正在生成 “省份、城市、区县” 三级联动数据...')
var pca = getAddressPCA(provinces, cities, areas)
var pca = getAddressPCA(provinces, cities, areas, streets)
console.log('[3/3] 正在生成 “省份、城市、区县、乡镇” 四级联动数据...')
var pcas = getAddressPCAS(provinces, cities, areas, streets)

Expand Down Expand Up @@ -90,7 +90,7 @@ function getAddressPC (provinces, cities) {
* @Author https://github.com/modood
* @DateTime 2016-10-09 16:00
*/
function getAddressPCA (provinces, cities, areas) {
function getAddressPCA (provinces, cities, areas, streets) {
var doc = {}

provinces.forEach(function (p) {
Expand All @@ -99,6 +99,15 @@ function getAddressPCA (provinces, cities, areas) {
cities.filter(function (c) {
return p.code === c.parent_code
}).forEach(function (c) {
// 特殊城市单独处理(中山市、东莞市、儋州市和嘉峪关市没有县级行政区划)
if (['441900', '442000', '460400', '620200'].indexOf(c.code) !== -1) {
doc[p.name][c.name] = streets.filter(function (s) {
return (c.code === '620200' ? '620201' : c.code) === s.parent_code
}).map(function (s) {
return s.name
})
return
}
doc[p.name][c.name] = areas.filter(function (a) {
return c.code === a.parent_code
}).map(function (a) {
Expand Down Expand Up @@ -137,6 +146,8 @@ function getAddressPCAS (provinces, cities, areas, streets) {
})
})
})
// 特殊区县单独处理(福建省泉州市金门县没有乡镇级行政区划)
doc['福建省']['泉州市']['金门县'].push('金门县')

return doc
}
Expand Down
9 changes: 5 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"name": "china-division",
"version": "1.1.0",
"version": "1.1.1",
"description": "中华人民共和国行政区划:省份、城市、区县、乡镇(街道)",
"main": "index.js",
"scripts": {
"test": "eslint . && mocha -t 5000",
"build": "node output.js",
"precommit": "npm test"
},
"repository": {
Expand Down Expand Up @@ -34,10 +35,10 @@
"iconv-lite": "^0.4.15"
},
"devDependencies": {
"eslint": "^3.19.0",
"eslint": "^4.4.1",
"eslint-config-standard": "^10.2.1",
"eslint-plugin-import": "^2.3.0",
"eslint-plugin-node": "^4.2.2",
"eslint-plugin-import": "^2.7.0",
"eslint-plugin-node": "^5.1.1",
"eslint-plugin-promise": "^3.5.0",
"eslint-plugin-standard": "^3.0.1",
"husky": "^0.13.4",
Expand Down
70 changes: 44 additions & 26 deletions spider.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ var async = require('async')
var iconv = require('iconv-lite')
var BufferHelper = require('bufferhelper')

var i = 0
var al = 0
var ai = 0

/**
* 从国家统计局(http://www.stats.gov.cn/)抓取县级以及县级以上行政区划数据
* @author modood <https://github.com/modood>
* @datetime 2016-12-19 16:32
*/
function fetch (callback) {
// 数据截止 2016 年 07 月 31 日(发布时间:2017-03-10 10:33)
// 数据截止 2016 年 07 月 31 日(发布时间:2017-03-10 10:33)
http.get('http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201703/t20170310_1471429.html', function (res) {
var rawData = ''
var statusCode = res.statusCode
Expand Down Expand Up @@ -42,24 +43,24 @@ function fetch (callback) {
}
return callback(null, result)
})
})
.on('error', callback)
}).on('error', callback)
}

/**
* 从国家统计局(http://www.stats.gov.cn/)抓取城乡行政区划数据
* @author modood <https://github.com/modood>
* @datetime 2016-12-19 16:35
*/
function fetchStreets (area, total, callback) {
function fetchStreets (area, callback) {
var html = ''
var areaCode = area.code
var areaName = area.name

// 两个特殊城市单独处理(中山市和东莞市没有县级行政区划
// 特殊城市单独处理(中山市、东莞市、儋州市没有县级行政区划
switch (areaCode) {
case '441900': html = '44/4419.html'; break
case '442000': html = '44/4420.html'; break
case '460400': html = '46/4604.html'; break
default: html = areaCode.substr(0, 2) + '/' + areaCode.substr(2, 2) + '/' + areaCode + '.html'
}

Expand All @@ -68,11 +69,25 @@ function fetchStreets (area, total, callback) {
var bufferHelper = new BufferHelper()
var statusCode = res.statusCode

if (['441900', '442000'].indexOf(areaCode) === -1) {
console.log('[' + ++i + '/' + total + '] 正在抓取乡镇数据,当前区县:', areaCode, areaName)
if (statusCode !== 200 && statusCode !== 404) {
res.resume()
return fetchStreets(area, callback)
}

if (statusCode !== 200) {
// 特殊城市或区县抓取乡镇数据不打印输出
if ([
'441900', // 东莞市
'442000', // 中山市
'460400', // 儋州市
'460321', // 三沙市-西沙群岛
'460322', // 三沙市-南沙群岛
'460323', // 三沙市-中沙群岛的岛礁及其海域
'620201' // 嘉峪关市
].indexOf(areaCode) === -1) {
console.log('[' + ++ai + '/' + al + '] 正在抓取乡镇数据,当前区县:', areaCode, areaName)
}

if (statusCode === 404) {
res.resume()
return callback(null, {})
}
Expand All @@ -85,17 +100,16 @@ function fetchStreets (area, total, callback) {
var rawData = iconv.decode(bufferHelper.toBuffer(), 'GBK')
var current
var result = {}
var reg = /<tr class='towntr'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g
var reg = /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g

while ((current = reg.exec(rawData)) !== null) {
result[current[1]] = current[2].trim()
}
return callback(null, result)
})
})
.on('error', function () {
}).on('error', function () {
console.log('连接超时,马上重试...')
fetchStreets(area, total, callback)
fetchStreets(area, callback)
})
}

Expand Down Expand Up @@ -153,7 +167,8 @@ function pickStreets (areas, callback) {
var streets = []

async.mapLimit(areas, 10, function (item, cb) {
fetchStreets(item, areas.length, function (err, data) {
al = areas.length
fetchStreets(item, function (err, data) {
if (err) return cb(err)

for (var k in data) {
Expand All @@ -173,27 +188,30 @@ function pickStreets (areas, callback) {
}

/**
* 两个特殊城市单独处理(中山市和东莞市没有县级行政区划)
* 特殊城市单独处理
* @author modood <https://github.com/modood>
* @datetime 2016-12-20 15:11
*/
function handleSpecialCities (callback) {
// 1. 中山市、东莞市、儋州市没有县级行政区划
// 2. 嘉峪关市下有一个县级行政区划叫市辖区(code: 620201),
// 因此也视为没有县级行政区划,但是 code 需要保留处理。
// 3. 三沙市下有县级行政区划,但是在 “最新县及县以上行政区划代码” 中
// 没有,因此需要手动加上。
// 4. 福建省泉州市金门县没有乡镇级行政区划
var areas = [
{
code: '442000',
name: '中山市',
parent_code: '442000'
},
{
code: '441900',
name: '东莞市',
parent_code: '441900'
}
{ code: '442000', name: '中山市', parent_code: '442000' },
{ code: '441900', name: '东莞市', parent_code: '441900' },
{ code: '460400', name: '儋州市', parent_code: '460400' },
{ code: '620201', name: '嘉峪关市', parent_code: '620200' },
{ code: '460321', name: '西沙群岛', parent_code: '460300' },
{ code: '460322', name: '南沙群岛', parent_code: '460300' },
{ code: '460323', name: '中沙群岛的岛礁及其海域', parent_code: '460300' }
]
var streets = []

async.each(areas, function (area, cb) {
fetchStreets(area, areas.length, function (err, data) {
fetchStreets(area, function (err, data) {
if (err) return cb(err)

for (var k in data) {
Expand Down
32 changes: 32 additions & 0 deletions test/json.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,46 @@ describe('中华人民共和国行政区划:', function () {

it('“省份、城市” 二级联动数据', function () {
assert.ok(address2['浙江省'].indexOf('杭州市') !== -1)

for (var p in address2) {
if (['台湾省', '香港特别行政区', '澳门特别行政区'].indexOf(p) === -1 &&
address2[p].length === 0) throw new Error(p + '的城市列表为空')
}
})

it('“省份、城市、区县” 三级联动数据', function () {
assert.ok(address3['云南省']['丽江市'].indexOf('古城区') !== -1)
assert.ok(address3['海南省']['三沙市'].indexOf('南沙群岛') !== -1)

assert.ok(address3['广东省']['中山市'].indexOf('横栏镇') !== -1)
assert.ok(address3['广东省']['东莞市'].indexOf('常平镇') !== -1)
assert.ok(address3['海南省']['儋州市'].indexOf('那大镇') !== -1)
assert.ok(address3['甘肃省']['嘉峪关市'].indexOf('文殊镇') !== -1)

for (var p in address3) {
for (var c in address3[p]) {
if (address3[p][c].length === 0) throw new Error(p + c + '的区县列表为空')
}
}
})

it('“省份、城市、区县、乡镇” 四级联动数据', function () {
assert.ok(address4['广西壮族自治区']['玉林市']['容县'].indexOf('石头镇') !== -1)
assert.ok(address4['海南省']['三沙市']['南沙群岛'].indexOf('永暑岛') !== -1)

assert.ok(address4['广东省']['中山市']['中山市'].indexOf('横栏镇') !== -1)
assert.ok(address4['广东省']['东莞市']['东莞市'].indexOf('常平镇') !== -1)
assert.ok(address4['海南省']['儋州市']['儋州市'].indexOf('那大镇') !== -1)
assert.ok(address4['甘肃省']['嘉峪关市']['嘉峪关市'].indexOf('文殊镇') !== -1)
assert.ok(address4['福建省']['泉州市']['金门县'].indexOf('金门县') !== -1)

for (var p in address4) {
for (var c in address4[p]) {
for (var a in address4[p][c]) {
if (address4[p][c][a].length === 0) throw new Error(p + c + a + '的乡镇列表为空')
}
}
}
})

it('“某省、某市、某县、某镇” 不存在', function () {
Expand Down
Loading

0 comments on commit 5b44d16

Please sign in to comment.