/*------------------------------------------------------------------------------- safemerge.ado: a wrapper for -merge- automatically resolving storage type mismatches between master and using datasets using -tostring- Copyright (C) 2016 Daniel Bela (daniel.bela@lifbi.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -------------------------------------------------------------------------------*/ *! version 1.1 15 July - enhancement in parsing variable lists, now also checking merge identifiers *! version 1.0.1 14 July - bugfix for not keeping master variables and observations if there was no data type conversion *! version 1.0 06 July - initial release *! safemerge.ado: a wrapper for -merge- automatically resolving storage type mismatches between master and using datasets using -tostring- *! Daniel Bela (daniel.bela@lifbi.de), Leibniz-Institute for Educational Trajectories e.V. (LIfBi), Germany program define safemerge , rclass // Stata version information: use unicode functions for string comparisons in Stata 14 and younger if (`c(stata_version)'>=14) { local substr_fct usubstr local regexm_fct ustrregexm local regexs_fct ustrregexs version 14 } else { local substr_fct substr local regexm_fct regexm local regexs_fct regexs version 12 } // declare macros local plural local allparams local usingpos local usingfile local newparams local safeusing local masterlist local usinglist local bothlist local string_in_master local string_in_using local convert_in_master local convertcount_master 0 local convert_in_using local convertcount_using 0 local convertvar local testvar local word local wordnum 0 local idvarlist local restrictvarlist local keepusingvarlist local restoresnapshot local mergetypes "_n" "1:1" "1:m" "m:1" "m:m" "n:1" "1:n" "n:n" "m:n" "n:m" // parse parameters (especially: detect using file name for checking variables!) local allparams `macval(0)' local usingpos: list posof "using" in allparams if (`usingpos'==0) { display as error "using required" exit 100 } local usingfile : word `=`usingpos'+1' of `allparams' foreach word of local allparams { if (`++wordnum'==1 & `: list word in mergetypes') continue if (`"`word'"'=="using") continue , break local idvarlist : list idvarlist | word } local newparams : subinstr local allparams `"`usingfile'"' `"\`safeusing'"' local safeusing `usingfile' // if parameters contain an option 'keepusing(varlist)', extract varlist and only compare these variables lateron if (`regexm_fct'(`"`allparams'"',`"keepus(i|in|ing)?\(([^\).]+)\)"')==1) { local keepusingvarlist=`regexs_fct'(2) } // compare file variable lists local restrictvarlist : list idvarlist | keepusingvarlist unab masterlist : _all quietly : describe using `"`usingfile'"' , varlist local usinglist `r(varlist)' local bothlist : list masterlist & usinglist if (!missing(`"`keepusingvarlist'"')) local bothlist : list bothlist & restrictvarlist // if matching variables are present in both files (and at least the merge identifiers shold be): check their storage types if (!missing(`"`bothlist'"')) { // fill lists in master and using foreach dataset in master using { if (`"`dataset'"'=="using") { // save data snapshot, switch to using dataset quietly : snapshot save , label("masterfile for safemerge.ado") local restoresnapshot `r(snapshot)' quietly : use `bothlist' using `"`usingfile'"' in 1 , clear } foreach testvar of local bothlist { if (`substr_fct'(`"`: type `testvar''"',1,3)=="str") local string_in_`dataset' : list string_in_`dataset' | testvar else local num_in_`dataset' : list num_in_`dataset' | testvar } } // compare lists local convert_in_master : list num_in_master & string_in_using local convert_in_using : list num_in_using & string_in_master // convert everything that needs to be converted in using and master foreach dataset in using master { if (!missing(`"`convert_in_`dataset''"')) { if (`"`dataset'"'=="using") { if (missing(`"`keepusingvarlist'"')) quietly : use `"`usingfile'"' , clear else quietly : use `restrictvarlist' using `"`usingfile'"' , clear } unstring `convert_in_`dataset'' local convertcount_`dataset' : word count `convert_in_`dataset'' if (`"`dataset'"'=="using") { // temporarily save tempfile safeusing quietly : save `"`safeusing'"' } } // jump back to master if (`"`dataset'"'=="using") { snapshot restore `restoresnapshot' snapshot erase `restoresnapshot' } } } // display and return results foreach dataset in master using { if (`convertcount_`dataset''>0) { if (`convertcount_`dataset''>1) local plural s else local plural display as text in smcl `" Note: {cmd:safemerge} force-converted {it:`convertcount_`dataset''} variable`plural' in `dataset' dataset to string: {it:`convert_in_`dataset''}"' return local convertlist_`dataset' `convert_in_`dataset'' } return scalar convertcount_`dataset'=`convertcount_`dataset'' } // merge with safe-to-merge dataset merge `newparams' // exit exit 0 end // sub-program executing -tostring- or -destring- to a list of variables program unstring syntax varlist(min=1) local var tempvar marker foreach var of local varlist { quietly { generate `marker'=(missing(`var')) tostring `var' , replace force usedisplayformat replace `var'="" if (`marker') drop `marker' } char define `var'[safemerge] force-converted to string in using file by safemerge.ado char define `var'[tostring] } exit 0 end // EOF