*! version 1.2.6 07oct2022 I I Bolotov program def pyconvertu version 16.0 /* By default, this small program converts country names (in English) to ISO 3166-1 codes (alpha-2, alpha-3, and numeric) and to full names in English and French using regular expressions with Unicode support. The from() option allows the user to import an external JSON file as a dictionary to replace the default classification. The JSON file can be created from data in memory, provided they include headings "Data", "Metadata", and "Sources" in the first variable, immediately followed by content. The template JSON file structure is the following ("regex" is a compulsory key and the file is saved using Python's json.dump()): [ { "regex":"^(.*afgh.*|\\s*AFG\\s*|\\s*AF\\s*|\\s*4\\s*)$", "name_en":"Afghanistan", # classification A "name_fr":"Afghanistan (l')", # classification B "iso3":"AFG", # ... "iso2":"AF", "isoN":"4" }, ... ] Author: Ilya Bolotov, MBA, Ph.D. Date: 20 July 2021 */ syntax /// name(name=name) [, to(string) Generate(string) replace print] /// [from(string) *] tempname json sections_n vars tempvar converted // import classification if trim(`"`from'"') == "" { qui findfile `"pyconvertu_classification.json"' local from `"`r(fn)'"' } // convert the variable to classification cap confirm variable `name' if ! _rc { /* check options for errors */ if trim(`"`to'"') == "" { // check for missing options di as err "option to() required" exit 198 } if trim(`"`replace'`generate'`print'"') == "" { di as err "must specify either generate, replace, or print option" exit 198 } /* ado + python */ cap python: l = _pyconvertu(r'`from'', Data.get('`name''), '`to'') if _rc { di as err "JSON file does not contain valid entry/dictionary" exit 7102 } if trim(`"`print'"') == "" { // store the converted variable g `converted' = "" python: Data.store('`converted'', None, l) if trim(`"`generate'"') != "" { // generate a new variable g `generate' = `converted' } if trim(`"`replace'"') != "" { // replace the existing one replace `name' = `converted' } } else { // print only python: print('`"' + '""'.join(l).replace('""', '"\', `"') + '"\'') } exit 0 } // save classification to a variable if trim(`"`name'"') == "__classification" { /* check options for errors */ if trim(`"`to'"') == "" { di as err "option to() required" exit 198 } if trim(`"`generate'`print'"') == "" { di as err "must specify either generate or print option" exit 198 } /* ado + python */ cap python: l = _pyconvertu_list(r'`from'', '`to''); l.sort() if _rc { di as err "JSON file does not contain valid entry/dictionary" exit 7102 } python: n = len(l) - Data.getObsTotal() if trim(`"`print'"') == "" { // store the classification g `converted' = "" python: Data.addObs((lambda n: n if n > 0 else 0)(n)) python: Data.store('`converted'', [i for i in range(0, len(l))], l) if trim(`"`generate'"') != "" { // generate a new variable g `generate' = `converted' } } else { // print only python: print('\`"' + '"\' \`"'.join(l) + '"\'') } exit 0 } // print metadata and sources if trim(`"`name'"') == "__info" { cap python: l = _pyconvertu_info(r'`from'') if _rc { di as err "JSON file does not contain valid entry/dictionary" exit 7102 } python: print(f'\n'.join(l)) exit 0 } // dump classification from data to a json file if trim(`"`name'"') == "__dump" { qui ds local `vars' = r(varlist) /* find _n of the json sections */ forvalues n = 1/`=_N' { if regexm(`=word("``vars''", 1)'[`n'], "^\s*(Data|Meta|Sources)") { local `sections_n' "``sections_n'' `n'" } } /* ado + python */ scalar `json' = "" cap preserve forvalues i = 1/`=wordcount("``sections_n''")' { local n = word("``sections_n''", `i') restore, preserve qui { drop if _n < `n' // isolate each json section cap drop if _n > `=real(word("``sections_n''", `i' + 1)) - `n'' drop if mi(`=word("``vars''", 1)') if regexm(`=word("``vars''", 1)'[1], "^\s*Data") { foreach var in ``vars'' { tostring `var', replace force replace `var' = `""`=`var'[2]'": ""' + `var' + `"""' } drop if _n <= 2 // Data (the classification) egen `converted' = concat(``vars''), punct(", ") replace `converted' = "{" + `converted' + "}" levelsof `converted', clean s(", ") scalar `json' = `json' + r(levels) + ", " } if regexm(`=word("``vars''", 1)'[1], "^\s*Meta") { drop if _n <= 2 // Metadata g `converted' = `"""' + `=word("``vars''", 1)' + /// `"": ""' + `=word("``vars''", 2)' + `"""' levelsof `converted', clean s(", ") scalar `json' = `json' + `"{"metadata": {"' + /// r(levels) + "}}, " } if regexm(`=word("``vars''", 1)'[1], "^\s*Sources") { drop if _n <= 2 // Sources g `converted' = `""["' + `=word("``vars''", 2)' + /// "](" + `=word("``vars''", 1)' + `")""' levelsof `converted', clean s(", ") scalar `json' = `json' + `"{"sources": ["' + /// r(levels) + "]}, " } } } python: _pyconvertu_dump(r'`to'', Scalar.getString('`json'')) exit 0 } // or display error di as err /// "must specify either a variable, __classification, __info, or __dump" exit 198 end * Python 3 code *********** version 16.0 python: # Stata Function Interface from sfi import Data, Scalar # Python Modules import json import os import re # User-defined Functions def _pyconvertu( source_file=r'', from_list=[], to_classification='', *args, **kwargs ): """ /* Converts a list of strings (from_list) to classification. (to_classification) based on a JSON file (source_file), unmatched strings are returned unchanged. */ """ try: #// load classification with open(os.path.expanduser(source_file)) as f: classification = list(filter( lambda d: not d.get('metadata') and not d.get('sources'), json.load(f) )) #// convert list return list(map( lambda s: (lambda l, s: l[1].get(to_classification) if len(l) > 1 else l[0] )( [s] + list(filter( lambda d: re.search( r'' + d.get('regex') + r'', s, flags=re.I|re.M ), classification )), str(s) ), from_list )) except: raise PyConvertUError def _pyconvertu_list( source_file=r'', from_classification='', *args, **kwargs ): """ /* Creates a list of strings from classification (from_classification) based on a JSON file (source_file). */ """ try: #// load classification with open(os.path.expanduser(source_file)) as f: classification = list(filter( lambda d: not d.get('metadata') and not d.get('sources'), json.load(f) )) #// create list return list(map( lambda d: d.get(from_classification), classification )) except: raise PyConvertUError def _pyconvertu_info( source_file=r'', *args, **kwargs ): """ /* Returns a list based on a JSON file (source_file). */ """ try: #// load classification metadata with open(os.path.expanduser(source_file)) as f: metadata = list(filter( lambda d: d.get('metadata') or d.get('sources'), json.load(f) )) #// create list return list(map( lambda d: str(d), metadata )) except: raise PyConvertUError def _pyconvertu_dump( target_file=r'', json_string='', *args, **kwargs ): """ /* Writes JSON string to a JSON file (target_file). */ """ target_file = target_file.replace('.json', '') + '.json' with open(os.path.expanduser(target_file), 'w') as f: #// dump classification and print message json.dump( json.loads('[' + json_string[0:-2].replace('\\', '\\\\') + ']'), f ) print( 'JSON file \'' + target_file.replace('.json', '') + '.json' + '\' created' ) end