-
Notifications
You must be signed in to change notification settings - Fork 1
/
pcn_compare.ado
341 lines (267 loc) · 8.91 KB
/
pcn_compare.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
/*==================================================
project: create comparability database
Author: David L. Vargas
E-email: [email protected]
url:
Dependencies: The World Bank
----------------------------------------------------
Creation Date: 2020-05-14
Do-file version: 01
References:
Output:
==================================================*/
/*========================================================
0: Program set up
========================================================*/
program define pcn_compare, rclass
syntax [anything(name=subcmd id="subcommand")], ///
[ ///
IDvar(string) ///
MAINv(string) ///
server(string) ///
server0(string) ///
DISvar(string) ///
check(string) ///
POVline(string) ///
TOLerance(integer 3) /// decimal places
listc(string) ///
SDLevel(string) ///
COUNtry(string) ///
REGion(string) ///
year(string) ///
FILLgaps ///
AGGregate ///
wb ///
vintage(string) ///
vintage0(string) ///
]
version 14
*---------- pause
if ("`pause'" == "pause") pause on
else pause off
//========================================================
// Start
//========================================================
qui {
/*================================================
1: Check options definition and declare macros
==================================================*/
// relevant macros
if ("`idvar'" == "" & "`aggregate'" != "") loc idvar "year povertyline "
else if ("`idvar'" == "" & "`wb'" != "") loc idvar "regioncode year povertyline"
else if ("`idvar'" == "") loc idvar "countrycode year povertyline coveragetype datatype"
else loc idvar = lower("`idvar'")
if ("`mainv'" == "") loc mainv "headcount"
else loc mainvar = lower("`mainv'")
if ("`server'" == "") loc server "AR"
else loc server = lower("`server'")
if ("`check'" == "") loc check "main"
else loc check = lower("`check'")
if ("`disvar'" == "") loc disvar "main"
else loc disvar = lower("`disvar'")
if ("`sdlevel'" == "") loc sdlevel = 2
if ("`server0'" != "") loc serverm = "server(`server0')"
if !inlist("`check'","main","all") {
noi di as err "Check varibables must be set to: main or all"
noi di as text "Check option forced to default"
loc check "main"
}
if !inlist("`disvar'","diff","main","all") {
noi di as err "Check varibables must be set to: main, diff or all"
noi di as text "Check option forced to default"
loc check "all"
}
/*================================================
2: Get data
==================================================*/
// get testing data
if ("`vintage'" == ""){
povcalnet `wb', server(`server') povline(`povline') ///
country(`country') region(`region') ///
year(`year') `fillgaps' `aggregate' clear
}
else{
pcn_production load, server(`server') vintage(`vintage') clear
cap rename wbcode countrycode
if (!_rc){
cap gen povertyline = 1.9
cap replace surveyyear = round(surveyyear)
cap rename surveyyear
cap gen datatype = .
cap gen coveragetype = .
cap replace datatype = 1 if inlist(inc_con, "c", "C")
cap replace datatype = 2 if inlist(inc_con, "i", "I")
cap replace coveragetype = 2 if regexm(country, "(Urban)") & national == 0
cap replace coveragetype = 1 if regexm(country, "(Rural)") & national == 0
cap replace coveragetype = 3 if national == 1
cap replace countrycode = substr(countrycode,1,3)
replace headcount = headcount/100
replace gini = gini/100
}
}
replace povertyline=round(povertyline,.1) // some odd cases they do not quite match
cap isid `idvar'
if _rc {
duplicates tag `idvar', gen(duplicate)
keep if duplicate > 0
lab var duplicate "Number of duplicities in case"
noi di as err "The testing server has unnexpected duplicates" char(10) as text "The process has stop, no duplicities should exist, check" char(10) as result "The data on memory contains the cases with duplicates"
noi tab duplicate
qui err 459
exit
}
if ("`check'" == "main"){
keep `idvar' `mainv'
}
* apply tolerance
local tl: disp _dup(`=`tolerance'-1') 0
local tl = ".`tl'1"
foreach mv of local mainv {
replace `mv' = round(`mv', `tl')
}
cap replace coveragetype = 3 if coveragetype == 4 // One national
tempfile serverd
save `serverd'
// Get current data
if ("`vintage0'" == ""){
povcalnet `wb', povline(`povline') ///
country(`country') region(`region') ///
year(`year') `serverm' `fillgaps' `aggregate' clear
if ("`check'" == "main"){
keep `idvar' `mainv'
}
}
else{
pcn_production load, `serverm' vintage(`vintage0') clear
cap rename wbcode countrycode
if (!_rc){
cap gen povertyline = 1.9
cap replace surveyyear = round(surveyyear)
cap rename surveyyear
cap gen datatype = .
cap gen coveragetype = .
cap replace datatype = 1 if inlist(inc_con, "c", "C")
cap replace datatype = 2 if inlist(inc_con, "i", "I")
cap replace coveragetype = 2 if regexm(country, "(Urban)") & national == 0
cap replace coveragetype = 1 if regexm(country, "(Rural)") & national == 0
cap replace coveragetype = 3 if national == 1
cap replace countrycode = substr(countrycode,1,3)
replace headcount = headcount/100
replace gini = gini/100
}
}
replace povertyline=round(povertyline,.1)
foreach mv of local mainv {
replace `mv' = round(`mv', `tl')
}
cap replace coveragetype = 3 if coveragetype == 4 // One national
tempfile PCN
save `PCN'
// Determine point status
merge 1:1 `idvar' using `serverd', update gen(status)
keep `idvar' status
lab define statusl 1 "Dropped" 2 "New point" 3 "Unchanged" 4 "Udpade from missing" 5 "Changed (conflict)"
lab values status statusl
preserve
/*================================================
3: Trace back changes
==================================================*/
*keep if inlist(status,3,4,5)
merge 1:1 `idvar' using `serverd', /*keep(match)*/ nogen
loc vlist
loc vlistt
foreach var of varlist _all {
if (!regexm("`idvar'","`var'") & "`var'" != "status"){
cap confirm string var `var'
if _rc {
loc vlab: var label `var'
rename `var' test_`var'
lab var test_`var' "Testing: `vlab'"
loc vlist "`vlist' `var'"
loc vlistt "`vlistt' test_`var'"
loc tvlist "`tvlist' `var' test_`var'"
}
else{
drop `var'
}
}
}
merge 1:1 `idvar' using `PCN', /*keep(match)*/ nogen
keep `idvar' status `vlist' `vlistt'
// difference in main values
if ("`mainv'" == "all") loc mainv "`vlist'"
loc dvars
foreach var of local mainv{
cap confirm var `var'
if (_rc == 0){
gen d_`var' = `var' - test_`var'
lab var d_`var' "difference in `var'"
loc dvars "`dvars' d_`var'"
loc mcall "`mcall' `var' test_`var'"
}
}
tempfile changes
save `changes'
restore
// join to get the final dataset
merge 1:1 `idvar' using `changes', nogen
order `idvar' status `dvars' `tvlist'
if ("`disvar'" != "all") {
if ("`disvar'" == "diff") {
keep `idvar' status `dvars'
}
if ("`disvar'" == "main"){
keep `idvar' status `dvars' `mcall'
}
}
/*================================================
4: Report results and return values
==================================================*/
// report back to user
noi di as text "The status of observations is as follows:"
noi tab status
// list of problematic obs
if (lower("`listc'")=="yes"){
if ("`wb'"!="") loc idc = "regioncode"
else loc idc = "countrycode"
tempvar obsid
egen `obsid' = concat(`idc' year), p(-)
lab var `obsid' "Country-year"
foreach var of local mainv{
foreach v in mn_d_`var' sd_d_`var'{
cap drop `v'
}
bysort regioncode: egen mn_d_`var' = mean(d_`var')
bysort regioncode: egen sd_d_`var' = sd(d_`var')
forv x = 1/`sdlevel' {
// higher than variables
cap drop ht_`x'sd_`var'
gen ht_`x'sd_`var' = abs(d_`var') > (mn_d_`var' + `x'*sd_d_`var') if d_`var' != .
tab ht_`x'sd_`var'
lab var ht_`x'sd_`var' "Higher than `x' SD from mean"
}
}
levelsof regioncode, local(regions)
forv x = 1/`sdlevel' {
loc vars "`vars' ht_`x'sd_`var'"
}
foreach vh of local vars{
local lab: variable label `vh'
foreach rg of local regions {
noi di "List of problems `rg'"
noi di "`lab'"
noi tab `obsid' if regioncode == "`rg'" & `vh' == 1
}
}
noi di as result "Comparison data load into memory"
}
} // end qui
end
exit
/* End of do-file */
><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
Notes:
1.
2.
3.
Version Control: