*********************************************************** *** 1. Exporting variables from World Bank *********************************************************** *** Download investment data /* wbopendata, indicator(NE.GDI.FTOT.ZS; NE.GDI.FTOT.KD; NY.GDP.PCAP.KD; SI.POV.GINI; NY.GDP.MKTP.KD.ZG; SP.POP.GROW; SE.PRM.ENRR; FP.CPI.TOTL.ZG) clear latest sort countryname reshape long yr, i(countryname indicatorcode) j(year) drop indicatorname gen indic=strtoname(indicatorcode,0) drop indicatorcode reshape wide yr, i(countryname countrycode year) j(indic) string rename yrNE_GDI_FTOT_ZS wb_invgdp rename yrNE_GDI_FTOT_KD wb_invusd rename yrNY_GDP_PCAP_KD wb_gdpcapf rename yrSI_POV_GINI wb_gini rename yrNY_GDP_MKTP_KD_ZG wb_gdpgrw rename yrSP_POP_GROW wb_popgrw rename yrSE_PRM_ENRR wb_prmenr rename yrFP_CPI_TOTL_ZG wb_cpitot label var wb_invgdp "Gross fixed capital formation (% of GDP), WDI" label var wb_invusd "Gross fixed capital formation (constant 2005 US$), WDI" label var wb_gdpcapf "GDP per capita (constant 2005 US$), WDI" label var wb_gini "GINI index (World Bank estimate)" label var wb_gdpgrw "GDP growth (annual %)" label var wb_popgrw "Population growth (annual %)" label var wb_prmenr "Primary school enrollment, %, WDI" label var wb_cpitot "Inflation, consumer prices (annual %)" save wdi_investment.dta, replace */ *********************************************************** *** 2. Extracting variables from CNTS *********************************************************** use CNTSdata, clear rename *, lower * Education rename school02 sch_prim rename school04 sch_secn rename school09 sch_univ rename school12 sch_liter replace sch_liter=sch_liter*.1 label variable sch_prim "Primary Enrollment per 10,000 population" label variable sch_secn "Secondary Enrollment per 10,000 population" label variable sch_univ "University Enrollment per 10,000 population" label variable sch_liter "Literacy Rate among age>15, %" sum sch_* if year>1960 * GDP (not availabe after 2000) rename economics1 ec_inccap rename economics2 ec_gdpcap rename economics3 ec_gnpcap rename economics6 ec_exchrt label variable ec_inccap "National Income per Capita" label variable ec_gdpcap "Gross Domestic Product Per Capita (Factor Cost)" label variable ec_gnpcap "Gross National Product Per Capita (Market Prices)" label variable ec_exchrt "Official/Princial Exchange Rate, Local Currency/ $US {.01}" * Political instability rename domestic1 pi_assas rename domestic2 pi_strike rename domestic3 pi_gorwar rename domestic4 pi_gvcris rename domestic5 pi_purge rename domestic6 pi_riot rename domestic7 pi_revol rename domestic8 pi_demon rename domestic9 pi_index label variable pi_assas "Assasinations" label variable pi_strike "General Strikes (1000+, aimed at gov)" label variable pi_gorwar "Guerilla Warfare" label variable pi_gvcris "Government Crises" label variable pi_purge "Purges" label variable pi_riot "Riots (100+ using physical force)" label variable pi_revol "Revolutions (attempted change of gov)" label variable pi_demon "Anti-gov Demonstrations (100+, peaceful)" label variable pi_index "Weighted Index of Conflict measures" * Population, Urbanization, democracy rename pop1 cn_pop rename polit02 cn_democ_dum recode cn_democ_dum 2 = 0 recode cn_democ_dum 3 = 0 recode cn_democ_dum 4 = 0 gen cn_urban =urban01/cn_pop label variable cn_pop "Total Population" label variable cn_democ_dum "=1 if Civilian Government" label variable cn_urban "% pop in city>100,000" keep code wbcode country year sch_* pi_* ec_* cn_* save cnts_worfile.dta, replace *********************************************************** *** 3. Merging CNTS and investment data *********************************************************** use cnts_worfile.dta, clear label var wbcode "3-letter country code in CNTS" gen countrycode=wbcode label var countrycode "3-letter country code in WDI" replace countrycode="ROU" if wbcode=="ROM" replace countrycode="SGP" if wbcode=="SIN" replace countrycode="ARE" if wbcode=="UAE" replace countrycode="COD" if wbcode=="ZAR" replace countrycode="SRB" if country=="Serbia" replace countrycode="MNE" if country=="Montenegro" replace countrycode="KSV" if country=="Kosovo" replace countrycode="TLS" if code=="0335" drop if countrycode=="" drop if code=="1247"|code=="1145" duplicates list countrycode year rename country cntry_cnts label var cntry_cnts "Country name in CNTS" merge 1:1 countrycode year using wdi_investment.dta drop if year>2011 drop if region=="Aggregates" tab year _m keep if _m==3 drop _m code rename countryname cntry_wdi label var cntry_wdi "Country name in WDI" order country* cntry* **Generate merge ID** gen yearstr = string(year) gen countrycodeid=countrycode+yearstr save cnts_inv_combined.dta, replace *********************************************************** *** 4. Creating inequality data *********************************************************** **Wiider shaping** use "wiiderOriginal.dta", clear set more off ***Install kountry and run*** ssc install kountry **Convert to standard country code** kountry country, from(other) stuck rename _ISO3N_ iso3n **Convert Standard to World Bank Country Code kountry iso3n , from(iso3n) to(iso3c) rename _ISO3C_ countrycode drop iso3n ***Fix Conversion issues* replace countrycode="BIH" if countrycode3=="BIH" replace countrycode="CAF" if countrycode3=="CAF" replace countrycode="COD" if countrycode3=="COD" replace countrycode="CZE" if countrycode3=="CSK" replace countrycode="ZWE" if countrycode3=="ZWE" **Generate merge ID** gen yearstr = string(year) gen countrycodeid=countrycode+yearstr ***Wiider Dropping and labeling*** keep countrycode3 country year mean gini quality welfaredefn equivsc areacovr uofanala year_cat countrycode yearstr countrycodeid label variable countrycode3 "Wiider 3 digit country code" label variable mean "Sample Mean of Gini year" label variable gini "Reported Gini Measurement" label variable quality "Quality of Gini measurement" label variable areacovr "Area Coverage (Rural/Urban)" label variable countrycodeid "Merge ID" label variable countrycode "Standardized World Bank country code" ***Fix capitalization and spelling issues** rename *, lower keep country year gini countrycode3 quality areacov welfaredef uofanal equivsc countrycode yearstr countrycodeid replace areacovr=70 if areacov==77 **(drops 200 missing observations)** drop if missing(gini) **Drop Consumption-based observations** **(drops 847 (200 nonduplicate) observations)** drop if welfaredef==1 ***PLACE DESIRED OBSERVATIONS FIRST*** ***Make AreaCoverage=All coded as =1** replace areacov=98 if areacov==1 replace areacov=1 if areacov==7 label define AreaCovr1 1 "All" 2 "28 main cities" 3 "4 principal cities" 4 "4 rural areas" 5 "7 principal cities" 6 "Agricultural sector" 8 "All, 8 districts in north and east excl. (15% of pop)" 9 "All, excl Costa Rural, Selva Rural and Selva Urbana" 10 "All, excl Costa Rural, Selva Rural and Selva Urbana (30% of pop)" 11 "All, excl Tirana" 12 "All, excl. Abkhasia and Tskhinvali" 13 "All, excl. East Timor" 14 "All, excl. East-Central State" 15 "All, excl. West Irian and East Timor" 16 "All, excl. West Irian, East Timor and Maluku" 17 "All, excl. northern and eastern provinces" 18 "All, excl. seven districts on national level" 19 "All, excl. some special areas (4% of population thereby excluded)" 20 "All, excl.Transdniestr" 21 "All, mainly urban areas" 22 "Asuncion" 23 "Capital" 24 "Cities" 25 "Cities (n=16)" 26 "Cities (n=17)" 27 "Cities (n=4)" 28 "Cities (n=7)" 29 "Cities (n=8)" 30 "Continental Portugal" 31 "DGEC" 32 "ECH-National" 33 "ECV" 34 "EMNV" 35 "ENAHO 1" 36 "ENAHO 2" 37 "ENAHO 3" 38 "ENCOVI" 39 "ENEI" 40 "ENEMDU" 41 "ENFT 1" 42 "ENFT 2" 43 "ENFT 3" 44 "ENFT 4" 45 "ENH-National" 46 "ENH-Urban" 47 "EPED-National" 48 "EPHC" 49 "East" 50 "Estate sector" 51 "Four areas of Brabados" 52 "GEIH-National" 53 "Greater Buenos Aires" 54 "INE" 55 "Interior (the parts outside the capital)" 56 "Interior, Capitales Departamentales only" 57 "Java" 58 "Main island" 59 "Metro" 60 "Metropolitan area" 61 "New PNAD" 62 "Nonagricultural sector" 63 "Nonmetropolian area" 64 "Peninsular Malaysia" 65 "Rural" 66 "Rural, excl. seven districts on national level" 67 "Six northern provinces" 68 "Three cantons" 69 "Two major cities" 70 "Urban" 71 "Urban (Santo Domingo)" 72 "Urban villages" 73 "Urban, Omdurman" 74 "Urban, excl. Western Province" 75 "Urban, excl. metro" 76 "Urban, excl. seven districts on national level" 77 "Utban" 78 "West" 79 "With Implicit Rent" 80 "With Rural North" 81 "Without Implicit Rent" 98 "15 Main Citiies", replace ***Make Unitofanalysis=Household coded as =1** replace uofa=5 if uofa==1 replace uofa=1 if uofa==3 label define UofAnala1 1 "Person" 2 "Household" 3 "NA" 4 "Tax Unit" 5 "Family", replace ***Make WelfareDefinition=Income,Disposable is coded as =1** replace welfaredef=1 if welf==12 label define Welfaredefn1 1 "Income,Disposable" 2 "Disposable Income" 3 "Earnings, .." 4 "Earnings, Gross" 5 "Earnings, Net" 6 "Factor Income" 7 "Income" 8 "Income Disposable, from taxable items" 9 "Income, .." 10 "Income, Factor" 11 "Income, Gross" 12 "NA" 13 "Income,Net" 14 "Income/Consumption" 15 "Market Income" 16 "Monetary Income" 17 "Monetary Income, .." 18 "Monetary Income, Disposable" 19 "Monetary Income, Disposable (excluding property income)" 20 "Monetary Income, Disposable, excl. self-empl. and property income" 21 "Monetary Income, Gross" 22 "Primary Income" 23 "Taxable Income" 24 "Taxable Income, Disposable" 25 "Taxable Income, Gross" 26 "Taxable Income, Gross incl deductions" 27 "Taxable Income, Net" 28 "Taxable Income, property income excluded" , replace ***SORT AND DROP DUPLICATES BY RULES** ***RULE 1: drop duplicates if values are not quality=High 0r Average** sort countrycode year quality areaco uofana welfaredef by countrycode year: gen dup1 = cond(_N==1,0,_n) gen dummy_goodquality=0 if quality==1 | quality==2 replace dummy_goodq=1 if missing(dummy_goodq) gen interact1 = dup1*dummy_goodq drop if interact1>1 ***RULE 2: drop duplicates if values are not areacoverage=All** by countrycode year: gen dup2 = cond(_N==1,0,_n) gen dummy_areaall=0 if areaco==1 replace dummy_areaall=1 if missing(dummy_areaall) gen interact2 = dup2*dummy_areaall drop if interact2>1 ***RULE 3: drop duplicates if values are not UofAnalysis=Person** by countrycode year: gen dup3 = cond(_N==1,0,_n) gen dummy_uofanperson=0 if uofanala==1 replace dummy_uofanperson=1 if missing(dummy_uofanperson) gen interact3 = dup3*dummy_uofanperson drop if interact3>1 ***RULE 4: drop duplicates if values are not WelfareDef=DisposableIncome** by countrycode year: gen dup4 = cond(_N==1,0,_n) gen dummy_welfdisposinc=0 if welfaredef==1 replace dummy_welfdisposinc=1 if missing(dummy_welfdisposinc) gen interact4 = dup4* dummy_welfdisposinc drop if interact4>1 ***RULE 5: drop duplicates if values are not quality=High** by countrycode year: gen dup5 = cond(_N==1,0,_n) gen dummy_highquality=0 if quality==1 replace dummy_highq=1 if missing(dummy_highq) gen interact5 = dup5*dummy_highquality drop if interact5>1 by countrycode year: gen dup6 = cond(_N==1,0,_n) drop if dup6>1 & equivs~=19 ***Generate Average measurement for remaining country years with duplicates** by countrycode year, sort: egen avqualgini = mean( gini) ***Drop remaining duplicates*** by countrycode year: gen dup7 = cond(_N==1,0,_n) drop if dup7>1 replace gini=avqualg keep country year gini countrycode3 quality areacov welfaredef uofanal equivsc countrycode yearstr countrycodeid **Merge with CNTS and Investment file** merge 1:1 countrycodeid using "cnts_inv_combined.dta" save "Merged_CW_Full.dta", replace **MERGED DATA SHAPING*** rename _merge mergematchinfo label define _merge 1 "Wiider only (1)" 2 "CNTS only (2)" 3 "matched (3)", replace keep year gini quality welfaredefn uofanala equivsc areacovr countrycode pi_* ec_* sch_* wb_* cn_* region regioncode mergematchinfo kountry countrycode, from(iso3c) to(iso3n) marker drop if MARKER==0 rename NAMES_STD country drop MARKER rename _ISO3N_ iso3n label variable iso3n "Standardized Numeric UN country codes" label variable country "Standardized UN country names" order iso3n countrycode country year gini mergematch sort iso3n year xtset iso3n year save "Merged_CW_Full.dta", replace cd "G:\Thesis" *** Create additional variables use "Merged_CW_Full.dta", clear label define _merge 1 "Wiider only (1)" 2 "CNTS only (2)" 3 "matched (3)", replace xtset iso3n year gen lngdpcap=ln(wb_gdpcapf) gen llngdpcap=l.lngdpcap /*1-year lag*/ label var llngdpcap "1-year lagged log of GDP per capita, in 2005 USD" gen trend=year-1959 label var trend "Time trend" gen trend2=trend^2 label var trend2 "Time trend squared" egen regid=group(region) label define regid 1 "E Asia & Pacific" 2 "Europe & C Asia" 3 "Latin America" 4 "Middle East & N Africa" 5 "N America" 6 " S Asia" 7 "Sub-Saharan Africa" label values regid regid save "Merged_CW_Full.dta", replace