* Authors: * Chuntao Li, Ph.D. , China Stata Club(爬虫俱乐部)(chtl@hust.edu.cn) * Xueren Zhang, China Stata Club(爬虫俱乐部)(zhijunzhang_hi@163.com) * Yuan Xue, China Stata Club(爬虫俱乐部)(xueyuan@hust.com) * December 4th, 2018 * Program written by Dr. Chuntao Li, Xueren Zhang and Yuan Xue * Used to get information about a given keyword location within a certain range of a place from Baidu Map API * and can only be used in Stata version 14.0 or above * Original Data Source: http://api.map.baidu.com * Please do not use this code for commerical purpose program define cnmapsearch if _caller() < 14.0 { disp as error "this is version `=_caller()' of Stata; it cannot run version 14.0 programs" exit 9 } syntax, baidukey(string) LATitude(string) LONGitude(string) KEYword(string) /// [RADius(real 2000) FILter(string asis) seefilter sample prefix(string) /// result(string asis)] qui { tempvar baidumap if `"`filter'"' == "" { gen `prefix'filter_type = "" gen `prefix'sort_name = "" } else { mata filter_token(`"`filter'"') if `filter_error' == 1 { disp as error "more than 2 parts in the option filter()" exit 198 } if !inlist("`filter_type'", "cater", "life", "hotel") { disp as error "you specify wrong category in the option filter()" exit 198 } if ("`filter_type'" == "cater" & !inlist("`filter_sort'", "distance", "price", "overall_rating", "taste_rating", "service_rating", "")) /// | ("`filter_type'" == "life" & !inlist("`filter_sort'", "distance", "price", "overall_rating", "comment_rating", "")) /// | ("`filter_type'" == "hotel" & !inlist("`filter_sort'", "distance", "price", "total_score", "level", "health_score", "")) { disp as error "you specify wrong preference in the option filter()" exit 198 } gen `prefix'filter_type = "`filter_type'" gen `prefix'sort_name = "`filter_sort'" } if `"`result'"' == "" local result = `"name address telephone tag distance lat lng"' mata result_token(`"`result'"') if `result_error' == 1 { disp as error "you specify the option result() wrongly" exit 198 } gen centerid = _n gen validsearch = 1 gen `baidumap' = "" qui sum validsearch local tmpsum = `r(sum)' local pagenum 0 while !inlist(`tmpsum',0){ forvalues i = 1/`=_N' { if(`=validsearch[`i']' == 1){ replace `baidumap' = fileread(`"http://api.map.baidu.com/place/v2/search?query=`keyword'&page_size=10&page_num=`pagenum'&scope=2&location=`=`latitude'[`i']',`=`longitude'[`i']'&radius=`radius'&filter=industry_type:`=`prefix'filter_type[1]'|sort_name:`=`prefix'sort_name[1]'&output=xml&ak=`baidukey'"') in `i' local times = 0 while filereaderror(`baidumap'[`i']) != 0 { sleep 1000 local times = `times' + 1 replace `baidumap' = fileread(`"http://api.map.baidu.com/place/v2/search?query=`keyword'&page_size=10&page_num=`pagenum'&scope=2&location=`=`latitude'[`i']',`=`longitude'[`i']'&radius=`radius'&filter=industry_type:`=`prefix'filter_type[1]'|sort_name:`=`prefix'sort_name[1]'&output=xml&ak=`baidukey'"') in `i' if `times' > 10 { noi disp as error "Internet speeds is too low to get the data" exit `=filereaderror(`baidumap'[`i'])' } } if index(`baidumap'[`i'], "AK有误请检查再重试") { noisily di as error "error: please check your baidukey" exit 198 } else if index(`baidumap'[`i'],"2") { di in red "error: please check your location in `i'" continue } } else{ replace `baidumap' = "" in `i' } } replace `baidumap' = ustrregexra(`baidumap', "\s*", "") replace `baidumap' = substr(`baidumap', index(`baidumap', ""), .) replace `baidumap' = substr(`baidumap', 1, index(`baidumap', "") - 1) cap split `baidumap', p(`""') gen("yes") local nvars = r(nvars) forvalues var_i = 1/`nvars' { forvalues r_i = 1/`result_num' { gen `prefix'`result_`r_i''`var_i' = ustrregexs(1) if ustrregexm(yes`var_i', "<`result_`r_i''>(.*?)") } } if ("`sample'"!=""){ replace validsearch = 0 } else{ replace validsearch = 0 if `prefix'name`nvars' == "" } qui sum validsearch local tmpsum = `r(sum)' local strlist = "centerid" forvalues i = 1/`result_num'{ local strlist = "`strlist' str150 `prefix'`result_`i''" di "`strlist'" } postfile tmploc `strlist' using tmploc`pagenum'.dta,replace forvalues row_i = 1/`=_N'{ forvalues var_i = 1/`nvars'{ local postloc = "(centerid[`row_i'])" local Flag 0 forvalues r_i = 1/`result_num'{ local postloc = `"`postloc' ("`=`prefix'`result_`r_i''`var_i'[`row_i']'") "' if("`=`prefix'`result_`r_i''`var_i'[`row_i']'" != ""){ local Flag = 1 } } if(`Flag'){ di "本轮post的宏内容为 `"`postloc'"'" post tmploc `postloc' } } } postclose tmploc forvalues r_i = 1/`result_num'{ drop `prefix'`result_`r_i''* } drop yes* local pagenum = `pagenum' + 1 } local pagenum = `pagenum' - 1 preserve if("`pagenum'" != "0"){ use tmploc0,clear forvalues i = 1/`pagenum'{ append using tmploc`i' cap erase tmploc`i'.dta } save tmploc0,replace } restore merge 1:m centerid using tmploc0 replace `prefix'address = "未找到地址" if _merge != 3 drop _merge bys centerid:gen locid = _n rename (lat lng) (loc_lat loc_lng) destring loc_lat loc_lng ,replace sort centerid locid order centerid locid cap erase __tmp.dta cap erase tmploc0.dta if "`seefilter'"==""{ drop `prefix'filter_type `prefix'sort_name } drop `baidumap'* drop validsearch cap destring distance*, replace } end cap mata mata drop filter_token() mata void function filter_token(string scalar filter_list) { string rowvector filter_vector filter_vector = tokens(filter_list) st_local("filter_error", "0") if (cols(filter_vector) == 1) { st_local("filter_type", filter_vector[1, 1]) st_local("filter_sort", "") } else if (cols(filter_vector) == 2) { st_local("filter_type", filter_vector[1, 1]) st_local("filter_sort", filter_vector[1, 2]) } else st_local("filter_error", "1") } end cap mata mata drop result_token() mata void function result_token(string scalar result_list) { string rowvector result_vector string rowvector all_vector result_vector = tokens(result_list) all_vector = tokens("name lat lng address province city area telephone tag detail_url distance overall_rating service_rating environment_rating hygiene_rating facility_rating") st_local("result_error", "0") for (i = 1; i <= cols(result_vector); i++) { if ((all_vector :== result_vector[1, i]) == J(1, cols(all_vector), 0)) { st_local("result_error", "1") break } st_local(sprintf("result_%g", i), result_vector[1, i]) } st_local("result_num", sprintf("%g", cols(result_vector))) } end