All the analyses are based on a large global dataset (and accompanying codebook) which we assembled from commonly used quantitative indicators on health, gender equality and several types of violence. The codebook, including variable definitions and summary statistics, is available as a Googlesheet in the shared Google Drive of the Lancet Commission Metrics Working Group, or at this link (access via logging into the shared folder provides more functionality). The dataset is also available in the shared Dropbox folder of the Working Group.) We begin by considering the following measures within the categories of health outcomes, gender inequalities, state-based violent conflict, one-sided violence, and societal violence. The code also sets up vectors of variable names for later use.)
<- "https://docs.google.com/spreadsheets/d/1KLFTva--XHVBM-IX6qaPtuyzmIlRMnpyjUXfBdJPsag/edit?usp=sharing"
link ::gs4_deauth()
googlesheets4<- googlesheets4::read_sheet(link, sheet = "codebook")
codebook <- function(x) { !is.na(x) & x }
IsTrue load("_data/dataset_LSCMWG.RData")
<- data %>%
data filter(year >= 1960)
# data <- data[, names(data) %in% codebook$variable[codebook$flag %in% c("id", "maybe", "use")]]
<- list(
categories one_sided = codebook$variable[IsTrue(codebook$flag == "use") &
$sub_category %in% c("one-sided violence", "repression")],
codebooksocietal = codebook$variable[IsTrue(codebook$flag == "use") &
IsTrue(codebook$sub_category == "societal violence")],
conflict = codebook$variable[IsTrue(codebook$flag == "use") &
IsTrue(codebook$sub_category == "state-based conflict")],
gender = codebook$variable[IsTrue(codebook$flag == "use") &
IsTrue(codebook$category == "gender") &
IsTrue(!codebook$sub_category %in% c("education", "labor") | is.na(codebook$sub_category))],
gender_labor = codebook$variable[IsTrue(codebook$flag == "use") &
IsTrue(codebook$category == "gender" & codebook$sub_category == "labor")],
gender_educ = codebook$variable[IsTrue(codebook$flag == "use") &
IsTrue(codebook$category == "gender" & codebook$sub_category == "education")],
health = codebook$variable[IsTrue(codebook$flag == "use") &
$category %in% c("health")]
codebook
)
categories<- list(
variables measurement_models = c("latentmean_fariss", "v2cltort", "v2clkill", "v2caviol"),
death_rates = c("deaths_all_int_rate", "deaths_civilians_int_rate",
"deaths_civilians_osv_rate", "deaths_all_nsc_rate",
"deaths_civilians_nsc_rate", "deaths_all_ucdp_rate", "hom_odcwho_rate"),
conflict_incidence = c("conflict_internal", "war_internal", "conflict_non_state", "conflict_one_sided"),
health_full = c("imr_wpp", "ufmr_wpp", "life_exp_wpp", "daly_ihme"),
health = c("imr_wpp", "life_exp_wpp"),
gender_full = c("labor_mod_ratio15_ilo", "wmn_parl_wdi", "tfr_wpp",
"asfr_adol_wpp", "daly_ratio_ihme", "mys_ratio_hdr",
"mys_ratio_bl", "mys_age_ratio_ihme", "mys_pop_ratio_ihme" ),
gender = c("asfr_adol_wpp", "mys_age_ratio_ihme"),
political = c("polity2_p4", "dem_bmr", "v2x_polyarchy", "v2x_libdem",
"v2x_liberal", "v2x_partipdem", "v2x_partip")
)<- lapply(variables, function(set) {
variables names(set) <- set
return(set)
})
Due to data availability, the time period for our analyses will be primarily from 1990 until 2015 or 2018.
lapply(categories, function(cat) {
<- lapply(cat, function(col_name) {
tbl table(data$year, !is.na(data[, col_name]))[, "TRUE"]
})<- do.call(cbind, tbl)
tbl colnames(tbl) <- cat
return(tbl)
})
## $one_sided
## deaths_civilians_osv_rate deaths_pitf_rate latentmean_fariss v2clkill v2cltort killing_estimate_mean_fariss
## 1960 0 0 109 108 108 0
## 1961 0 0 113 113 113 0
## 1962 0 0 119 119 119 0
## 1963 0 0 120 120 120 0
## 1964 0 0 123 123 123 0
## 1965 0 0 127 127 127 0
## 1966 0 0 131 131 131 0
## 1967 0 0 132 132 132 0
## 1968 0 0 135 135 135 0
## 1969 0 0 135 135 135 0
## 1970 0 0 136 136 136 0
## 1971 0 0 140 140 140 0
## 1972 0 0 140 140 140 0
## 1973 0 0 141 140 140 0
## 1974 0 0 143 141 141 0
## 1975 0 0 150 148 148 0
## 1976 0 0 151 148 148 0
## 1977 0 0 152 149 149 0
## 1978 0 0 154 150 150 0
## 1979 0 0 156 150 150 0
## 1980 0 0 157 151 151 0
## 1981 0 0 159 151 151 0
## 1982 0 0 159 151 151 0
## 1983 0 0 160 151 151 0
## 1984 0 0 161 151 151 0
## 1985 0 0 161 151 151 0
## 1986 0 0 162 151 151 0
## 1987 0 0 162 151 151 0
## 1988 0 0 162 151 151 0
## 1989 163 0 162 151 151 162
## 1990 165 0 163 151 151 163
## 1991 182 0 181 166 166 181
## 1992 185 0 184 168 168 184
## 1993 189 0 188 170 170 188
## 1994 191 0 188 171 171 188
## 1995 191 191 189 171 171 189
## 1996 191 191 188 171 171 188
## 1997 191 191 189 171 171 189
## 1998 191 191 189 171 171 189
## 1999 191 191 191 171 171 191
## 2000 191 191 191 171 171 191
## 2001 191 191 191 171 171 191
## 2002 192 192 192 172 172 192
## 2003 192 192 192 172 172 192
## 2004 192 192 192 172 172 192
## 2005 192 192 192 172 172 192
## 2006 194 194 193 173 173 193
## 2007 194 194 193 173 173 193
## 2008 194 194 194 174 174 194
## 2009 194 194 194 174 174 194
## 2010 194 194 194 174 174 194
## 2011 195 195 195 175 175 195
## 2012 195 195 195 175 175 195
## 2013 195 195 195 173 173 195
## 2014 195 195 195 173 173 195
## 2015 195 195 195 173 173 195
## 2016 195 195 195 173 173 195
## 2017 195 195 195 173 173 195
## 2018 195 195 195 173 173 195
## 2019 195 195 194 171 171 10
##
## $societal
## deaths_all_nsc_rate deaths_civilians_nsc_rate hom_odc_rate hom_odcwho_rate hom_who_rate v2caviol
## 1960 0 0 0 32 32 102
## 1961 0 0 0 36 36 107
## 1962 0 0 0 36 36 113
## 1963 0 0 0 40 40 114
## 1964 0 0 0 42 42 117
## 1965 0 0 0 46 46 121
## 1966 0 0 0 45 45 125
## 1967 0 0 0 49 49 125
## 1968 0 0 0 52 52 128
## 1969 0 0 0 50 50 128
## 1970 0 0 0 48 48 129
## 1971 0 0 0 47 47 133
## 1972 0 0 0 47 47 133
## 1973 0 0 0 48 48 133
## 1974 0 0 0 51 51 134
## 1975 0 0 0 53 53 141
## 1976 0 0 0 50 50 142
## 1977 0 0 0 58 58 143
## 1978 0 0 0 55 55 144
## 1979 0 0 0 53 53 144
## 1980 0 0 0 56 56 145
## 1981 0 0 0 58 58 145
## 1982 0 0 0 52 52 145
## 1983 0 0 0 58 58 145
## 1984 0 0 0 60 60 145
## 1985 0 0 0 60 60 145
## 1986 0 0 0 59 59 145
## 1987 0 0 0 68 68 145
## 1988 0 0 0 60 60 145
## 1989 163 163 0 62 62 145
## 1990 165 165 68 85 63 147
## 1991 182 182 80 99 77 163
## 1992 185 185 87 102 80 165
## 1993 189 189 86 102 80 167
## 1994 191 191 93 107 79 168
## 1995 191 191 95 105 75 168
## 1996 191 191 94 103 62 168
## 1997 191 191 97 103 50 168
## 1998 191 191 97 101 40 168
## 1999 191 191 100 101 30 168
## 2000 191 191 112 114 23 169
## 2001 191 191 119 121 18 169
## 2002 192 192 118 120 16 170
## 2003 192 192 132 132 14 170
## 2004 192 192 139 139 11 170
## 2005 192 192 136 136 8 170
## 2006 194 194 141 141 5 171
## 2007 194 194 143 143 4 171
## 2008 194 194 153 153 20 172
## 2009 194 194 150 151 17 172
## 2010 194 194 154 155 18 172
## 2011 195 195 144 145 18 173
## 2012 195 195 146 147 18 173
## 2013 195 195 127 129 18 167
## 2014 195 195 123 126 18 167
## 2015 195 195 125 130 15 167
## 2016 195 195 119 125 25 167
## 2017 195 195 110 114 21 167
## 2018 195 195 87 90 15 167
## 2019 195 195 0 11 11 168
##
## $conflict
## conflict_internal deaths_all_ext_rate deaths_all_int_rate deaths_civilians_ext_rate deaths_civilians_int_rate
## 1960 113 0 0 0 0
## 1961 117 0 0 0 0
## 1962 124 0 0 0 0
## 1963 125 0 0 0 0
## 1964 128 0 0 0 0
## 1965 132 0 0 0 0
## 1966 136 0 0 0 0
## 1967 137 0 0 0 0
## 1968 141 0 0 0 0
## 1969 141 0 0 0 0
## 1970 143 0 0 0 0
## 1971 148 0 0 0 0
## 1972 148 0 0 0 0
## 1973 149 0 0 0 0
## 1974 151 0 0 0 0
## 1975 158 0 0 0 0
## 1976 158 0 0 0 0
## 1977 159 0 0 0 0
## 1978 162 0 0 0 0
## 1979 165 0 0 0 0
## 1980 166 0 0 0 0
## 1981 168 0 0 0 0
## 1982 168 0 0 0 0
## 1983 169 0 0 0 0
## 1984 169 0 0 0 0
## 1985 169 0 0 0 0
## 1986 171 0 0 0 0
## 1987 171 0 0 0 0
## 1988 171 0 0 0 0
## 1989 171 163 163 163 163
## 1990 171 165 165 165 165
## 1991 186 182 182 182 182
## 1992 188 185 185 185 185
## 1993 190 189 189 189 189
## 1994 192 191 191 191 191
## 1995 192 191 191 191 191
## 1996 192 191 191 191 191
## 1997 192 191 191 191 191
## 1998 192 191 191 191 191
## 1999 192 191 191 191 191
## 2000 192 191 191 191 191
## 2001 192 191 191 191 191
## 2002 193 192 192 192 192
## 2003 193 192 192 192 192
## 2004 193 192 192 192 192
## 2005 193 192 192 192 192
## 2006 194 194 194 194 194
## 2007 194 194 194 194 194
## 2008 195 194 194 194 194
## 2009 195 194 194 194 194
## 2010 195 194 194 194 194
## 2011 196 195 195 195 195
## 2012 196 195 195 195 195
## 2013 196 195 195 195 195
## 2014 196 195 195 195 195
## 2015 196 195 195 195 195
## 2016 196 195 195 195 195
## 2017 196 195 195 195 195
## 2018 196 195 195 195 195
## 2019 196 195 195 195 195
##
## $gender
## gdi_hdr gii_hdr wmn_parl_wdi asfr_adol_wpp fert_adol_wdi fertility_wdi tfr_wpp
## 1960 0 0 0 101 100 100 101
## 1961 0 0 0 0 104 103 0
## 1962 0 0 0 0 110 110 0
## 1963 0 0 0 0 111 110 0
## 1964 0 0 0 0 114 114 0
## 1965 0 0 0 119 118 118 119
## 1966 0 0 0 0 122 122 0
## 1967 0 0 0 0 122 122 0
## 1968 0 0 0 0 125 125 0
## 1969 0 0 0 0 125 125 0
## 1970 0 0 0 128 127 127 128
## 1971 0 0 0 0 132 132 0
## 1972 0 0 0 0 132 132 0
## 1973 0 0 0 0 133 133 0
## 1974 0 0 0 0 135 135 0
## 1975 0 0 0 143 142 142 143
## 1976 0 0 0 0 144 144 0
## 1977 0 0 0 0 145 144 0
## 1978 0 0 0 0 146 145 0
## 1979 0 0 0 0 149 148 0
## 1980 0 0 0 151 150 149 151
## 1981 0 0 0 0 152 151 0
## 1982 0 0 0 0 152 153 0
## 1983 0 0 0 0 152 151 0
## 1984 0 0 0 0 152 151 0
## 1985 0 0 0 153 152 151 153
## 1986 0 0 0 0 153 152 0
## 1987 0 0 0 0 153 156 0
## 1988 0 0 0 0 153 152 0
## 1989 0 0 0 0 153 152 0
## 1990 0 0 0 156 155 154 156
## 1991 0 0 0 0 172 171 0
## 1992 0 0 0 0 175 177 0
## 1993 0 0 0 0 178 177 0
## 1994 0 0 0 0 179 178 0
## 1995 117 121 0 181 179 179 181
## 1996 0 0 0 0 179 178 0
## 1997 0 0 159 0 179 181 0
## 1998 0 0 161 0 179 179 0
## 1999 0 0 152 0 179 180 0
## 2000 145 119 156 181 179 181 181
## 2001 0 0 166 0 179 180 0
## 2002 0 0 160 0 180 183 0
## 2003 0 0 174 0 180 182 0
## 2004 0 0 180 0 180 181 0
## 2005 156 144 184 182 180 183 182
## 2006 0 0 188 0 182 185 0
## 2007 0 0 187 0 182 185 0
## 2008 0 0 186 0 182 185 0
## 2009 0 0 186 0 182 185 0
## 2010 161 150 186 184 182 185 184
## 2011 163 151 186 0 182 185 0
## 2012 163 154 187 0 182 186 0
## 2013 163 150 182 0 182 184 0
## 2014 164 152 185 0 182 184 0
## 2015 164 158 187 185 182 185 185
## 2016 164 162 190 0 182 184 0
## 2017 165 160 188 0 182 184 0
## 2018 165 160 189 185 182 184 185
## 2019 166 162 191 0 182 184 0
##
## $gender_labor
## labor_mod_ratio15_ilo labor_mod_ratio15_wdi labor_nat_ratio15_ilo labor_nat_ratio15_wdi
## 1960 0 0 31 31
## 1961 0 0 20 20
## 1962 0 0 7 7
## 1963 0 0 9 9
## 1964 0 0 6 6
## 1965 0 0 8 8
## 1966 0 0 15 15
## 1967 0 0 3 3
## 1968 0 0 3 3
## 1969 0 0 3 3
## 1970 0 0 34 34
## 1971 0 0 23 23
## 1972 0 0 9 9
## 1973 0 0 10 9
## 1974 0 0 12 12
## 1975 0 0 29 29
## 1976 0 0 27 27
## 1977 0 0 25 26
## 1978 0 0 32 32
## 1979 0 0 40 39
## 1980 0 0 61 60
## 1981 0 0 49 48
## 1982 0 0 39 38
## 1983 0 0 46 44
## 1984 0 0 44 42
## 1985 0 0 56 54
## 1986 0 0 56 54
## 1987 0 0 51 49
## 1988 0 0 55 53
## 1989 0 0 63 61
## 1990 153 150 70 68
## 1991 169 167 83 83
## 1992 172 170 65 64
## 1993 175 173 72 71
## 1994 176 174 77 76
## 1995 176 174 76 75
## 1996 176 174 87 86
## 1997 176 174 85 84
## 1998 176 174 88 87
## 1999 176 174 87 88
## 2000 176 174 99 100
## 2001 176 174 97 97
## 2002 177 175 96 96
## 2003 177 175 99 98
## 2004 177 175 100 100
## 2005 177 175 107 106
## 2006 179 177 106 107
## 2007 179 177 103 102
## 2008 179 177 103 103
## 2009 179 177 112 114
## 2010 179 177 115 116
## 2011 180 177 113 115
## 2012 180 177 118 118
## 2013 180 177 118 118
## 2014 180 177 123 122
## 2015 180 177 114 111
## 2016 180 177 112 114
## 2017 180 177 111 118
## 2018 180 177 98 106
## 2019 180 177 64 107
##
## $gender_educ
## eys_ratio_hdr eys_ratio_uis lit_adult_ratio_uis lit_rate_ratio_wdi mys_ratio_hdr mys_ratio_uis mys_ratio_bl mys_age_ratio_ihme mys_pop_ratio_ihme
## 1960 0 0 0 0 0 0 91 0 0
## 1961 0 0 0 0 0 0 0 0 0
## 1962 0 0 0 0 0 0 0 0 0
## 1963 0 0 0 0 0 0 0 0 0
## 1964 0 0 0 0 0 0 0 0 0
## 1965 0 0 0 0 0 0 108 0 0
## 1966 0 0 0 0 0 0 0 0 0
## 1967 0 0 0 0 0 0 0 0 0
## 1968 0 0 0 0 0 0 0 0 0
## 1969 0 0 0 0 0 0 0 0 0
## 1970 0 29 1 1 0 27 116 130 130
## 1971 0 98 0 0 0 17 0 135 135
## 1972 0 93 1 1 0 5 0 135 135
## 1973 0 91 0 0 0 3 0 136 136
## 1974 0 86 0 0 0 5 0 138 138
## 1975 0 85 6 6 0 13 123 145 145
## 1976 0 94 7 7 0 9 0 147 147
## 1977 0 96 1 1 0 2 0 148 148
## 1978 0 93 2 2 0 2 0 150 150
## 1979 0 94 6 6 0 6 0 153 153
## 1980 0 91 16 16 0 23 124 154 154
## 1981 0 98 16 16 0 21 0 156 156
## 1982 0 95 7 7 0 7 0 156 156
## 1983 0 96 2 2 0 4 0 156 156
## 1984 0 100 7 8 0 4 0 156 156
## 1985 0 96 6 6 0 6 125 156 156
## 1986 0 104 4 4 0 8 0 158 158
## 1987 0 98 3 3 0 2 0 158 158
## 1988 0 99 5 5 0 6 0 158 158
## 1989 0 96 1 1 0 2 0 158 158
## 1990 94 92 15 15 125 19 127 160 160
## 1991 0 95 20 20 0 16 0 177 177
## 1992 0 94 9 9 0 8 0 180 180
## 1993 0 105 4 4 0 2 0 183 183
## 1994 0 98 11 11 0 3 0 184 184
## 1995 145 92 5 5 143 5 142 184 184
## 1996 0 87 9 9 0 5 0 184 184
## 1997 0 71 3 3 0 0 0 184 184
## 1998 0 84 6 6 0 3 0 184 184
## 1999 0 134 8 8 0 4 0 184 184
## 2000 166 131 43 44 154 16 142 184 184
## 2001 0 127 28 29 0 22 0 184 184
## 2002 0 129 19 20 0 16 0 185 185
## 2003 0 124 10 10 0 11 0 185 185
## 2004 0 136 24 25 0 29 0 185 185
## 2005 171 139 20 22 165 33 142 185 185
## 2006 0 134 26 28 0 50 0 187 187
## 2007 0 138 35 36 0 53 0 187 187
## 2008 0 136 32 32 0 62 0 187 187
## 2009 0 133 34 35 0 63 0 187 187
## 2010 178 135 48 51 168 74 143 187 187
## 2011 180 138 57 58 170 76 0 188 188
## 2012 180 131 45 46 170 67 0 188 188
## 2013 180 117 34 34 170 58 0 188 188
## 2014 180 128 52 52 171 68 0 188 188
## 2015 181 131 42 42 171 64 143 188 188
## 2016 181 126 34 35 171 61 0 0 0
## 2017 182 123 32 32 171 48 0 0 0
## 2018 182 64 77 77 171 35 0 0 0
## 2019 182 4 0 1 173 0 0 0 0
##
## $health
## daly_ihme imr_wdi imr_wpp life_exp_wdi life_exp_wpp ufmr_wdi ufmr_wpp physicians_pc_wdi daly_ratio_ihme mmr_unicef
## 1960 0 74 101 99 101 74 101 85 0 0
## 1961 0 78 0 104 0 78 0 18 0 0
## 1962 0 87 0 110 0 87 0 16 0 0
## 1963 0 89 0 110 0 89 0 16 0 0
## 1964 0 93 0 113 0 93 0 16 0 0
## 1965 0 98 119 117 119 98 119 98 0 0
## 1966 0 103 0 122 0 103 0 19 0 0
## 1967 0 104 0 122 0 104 0 17 0 0
## 1968 0 108 0 125 0 108 0 17 0 0
## 1969 0 112 0 125 0 112 0 17 0 0
## 1970 0 114 128 127 128 114 128 112 0 0
## 1971 0 121 0 132 0 121 0 25 0 0
## 1972 0 123 0 132 0 123 0 21 0 0
## 1973 0 124 0 133 0 124 0 22 0 0
## 1974 0 127 0 135 0 127 0 20 0 0
## 1975 0 133 143 142 143 133 143 80 0 0
## 1976 0 135 0 143 0 135 0 25 0 0
## 1977 0 136 0 144 0 136 0 26 0 0
## 1978 0 143 0 145 0 143 0 24 0 0
## 1979 0 146 0 148 0 146 0 34 0 0
## 1980 0 148 151 150 151 148 151 68 0 0
## 1981 0 150 0 152 0 150 0 91 0 0
## 1982 0 152 0 153 0 152 0 42 0 0
## 1983 0 154 0 152 0 154 0 35 0 0
## 1984 0 154 0 152 0 154 0 72 0 0
## 1985 0 159 153 152 153 159 153 57 0 0
## 1986 0 161 0 153 0 161 0 50 0 0
## 1987 0 161 0 156 0 161 0 37 0 0
## 1988 0 161 0 153 0 161 0 45 0 0
## 1989 0 161 0 153 0 161 0 48 0 0
## 1990 160 163 156 155 156 163 156 119 160 0
## 1991 177 180 0 172 0 180 0 89 177 0
## 1992 180 183 0 177 0 183 0 91 180 0
## 1993 183 186 0 178 0 186 0 115 183 0
## 1994 184 188 0 180 0 188 0 88 184 0
## 1995 184 188 181 181 181 188 181 121 184 0
## 1996 184 188 0 180 0 188 0 105 184 0
## 1997 184 188 0 182 0 188 0 111 184 0
## 1998 184 188 0 180 0 188 0 95 184 0
## 1999 184 188 0 181 0 188 0 91 184 0
## 2000 184 188 181 182 181 188 181 107 184 180
## 2001 184 188 0 180 0 188 0 100 184 0
## 2002 185 189 0 183 0 189 0 95 185 0
## 2003 185 189 0 181 0 189 0 88 185 0
## 2004 185 189 0 181 0 189 0 133 185 0
## 2005 185 189 182 182 182 189 182 105 185 181
## 2006 187 191 0 183 0 191 0 104 187 0
## 2007 187 191 0 183 0 191 0 117 187 0
## 2008 187 191 0 184 0 191 0 137 187 0
## 2009 187 191 0 184 0 191 0 135 187 0
## 2010 187 191 184 184 184 191 184 164 187 183
## 2011 188 191 0 184 0 191 0 117 188 0
## 2012 188 191 0 185 0 191 0 115 188 0
## 2013 188 191 0 184 0 191 0 115 188 0
## 2014 188 191 0 184 0 191 0 113 188 0
## 2015 188 191 185 184 185 191 185 105 188 184
## 2016 188 191 0 184 0 191 0 112 188 0
## 2017 188 191 0 184 0 191 0 103 188 184
## 2018 0 191 185 184 185 191 185 61 0 0
## 2019 0 191 0 184 0 191 0 0 0 0
lapply(names(categories), function(category) {
::ggpairs(data[, categories[[category]]], mapping = aes(stroke = 0, alpha = 0.25)) +
GGallytheme_bw() +
theme(strip.text.y.right = element_text(angle = 0)) +
ggtitle(category)
})
Based on theoretical relevance and representativeness (see within-category correlations), we show correlations of certain of these variables across the categories:
<- c("labor_mod_ratio15_ilo", "mys_ratio_hdr", "mys_age_ratio_ihme", "tfr_wpp",
variables_to_include "asfr_adol_wpp", "imr_wpp", "ufmr_wpp", "daly_ihme", "physicians_pc_wdi",
"life_exp_wpp", "pc_rgdpe_pwt", "deaths_all_int_rate", "deaths_all_osv_rate",
"latentmean_fariss", "killing_estimate_mean_fariss", "deaths_all_nsc_rate", "hom_odcwho_rate")
::ggpairs(data[, variables_to_include], mapping = aes(stroke = 0, alpha = 0.25)) +
GGallytheme_bw() + theme(strip.text.y.right = element_text(angle = 0))
We noticed that some of the education ratios (female to male) are surprisingly high, and want to know which country-years account for this.
$ed <- c("mys_ratio_hdr", "mys_ratio_uis", "mys_age_ratio_ihme", "eys_ratio_hdr", "eys_ratio_uis")
variablessummary(data[, variables$ed])
## mys_ratio_hdr mys_ratio_uis mys_age_ratio_ihme eys_ratio_hdr eys_ratio_uis
## Min. :0.104 Min. :0.081 Min. :0.1226 Min. :0.080 Min. :0.052
## 1st Qu.:0.744 1st Qu.:0.878 1st Qu.:0.5306 1st Qu.:0.942 1st Qu.:0.884
## Median :0.937 Median :0.956 Median :0.8374 Median :1.024 Median :0.989
## Mean :0.859 Mean :0.902 Mean :0.7455 Mean :0.989 Mean :0.925
## 3rd Qu.:1.000 3rd Qu.:1.002 3rd Qu.:0.9516 3rd Qu.:1.064 3rd Qu.:1.015
## Max. :1.529 Max. :1.267 Max. :1.2726 Max. :1.317 Max. :1.441
## NA's :8015 NA's :9165 NA's :2538 NA's :7926 NA's :5100
paste(unique(data$country[IsTrue(data$mys_ratio_hdr > 1)]), collapse = "; ")
## [1] "Argentina; Armenia; Australia; Bahamas; Barbados; Belize; Brazil; Bulgaria; Canada; Colombia; Costa Rica; Cuba; Denmark; Dominican Republic; Estonia; Fiji; Finland; Gabon; Georgia; Guyana; Honduras; Iceland; Ireland; Israel; Jamaica; Japan; Jordan; Kazakhstan; Kuwait; Kyrgyzstan; Latvia; Lesotho; Libya; Lithuania; Madagascar; Malawi; Malta; Moldova; Mongolia; Myanmar; Namibia; New Zealand; Nicaragua; Norway; Oman; Panama; Paraguay; Philippines; Poland; Portugal; Qatar; Saint Lucia; Saint Vincent & the Grenadines; Suriname; Sweden; Tonga; Trinidad & Tobago; United Arab Emirates; United States of America; Uruguay; Venezuela"
# data[IsTrue(data$mys_ratio_hdr > 1), c("country", "year", variables$ed)]
Since we are dealing with a wide range of measures, we need to ensure that they reflect the same “direction” of effects. We do this by coding the negative of some variables, so that higher values indicate better outcomes for the health and gender categories and higher values indicate worse outcomes for the violence categories.
$negative <- c("imr_wpp", "ufmr_wpp", "mmr_unicef", "daly_ihme", "asfr_adol_wpp", "tfr_wpp", "gii_hdr",
variables"latentmean_fariss", "v2cltort", "v2clkill", "latentmean_fariss_cumulative1991",
"latentmean_fariss_cumulative1996", "v2cltort_cumulative1991",
"v2cltort_cumulative1996", "v2clkill_cumulative1991", "v2clkill_cumulative1996")
summary(data[, c(variables$negative[1:9])])
## imr_wpp ufmr_wpp mmr_unicef daly_ihme asfr_adol_wpp tfr_wpp gii_hdr latentmean_fariss v2cltort
## Min. : 1.254 Min. : 1.765 Min. : 2 Min. : 15334 Min. : 0.283 Min. :1.051 Min. :0.025 Min. :-3.4595 Min. :-3.150
## 1st Qu.: 15.363 1st Qu.: 18.328 1st Qu.: 17 1st Qu.: 27130 1st Qu.: 29.676 1st Qu.:2.170 1st Qu.:0.208 1st Qu.:-0.8403 1st Qu.:-0.879
## Median : 40.161 Median : 50.881 Median : 65 Median : 32776 Median : 69.553 Median :3.782 Median :0.405 Median : 0.1710 Median : 0.446
## Mean : 56.186 Mean : 84.469 Mean : 202 Mean : 43276 Mean : 78.652 Mean :4.065 Mean :0.379 Mean : 0.3545 Mean : 0.422
## 3rd Qu.: 85.913 3rd Qu.:129.686 3rd Qu.: 277 3rd Qu.: 50724 3rd Qu.:117.050 3rd Qu.:5.955 3rd Qu.:0.537 3rd Qu.: 1.4236 3rd Qu.: 1.747
## Max. :319.239 Max. :465.517 Max. :2480 Max. :788016 Max. :242.574 Max. :8.800 Max. :0.819 Max. : 5.3362 Max. : 3.509
## NA's :8259 NA's :8259 NA's :9396 NA's :5145 NA's :8259 NA's :8259 NA's :8365 NA's :274 NA's :975
$negative] <- lapply(data[, variables$negative], '-')
data[, variablessummary(data[, c(variables$negative[1:9])])
## imr_wpp ufmr_wpp mmr_unicef daly_ihme asfr_adol_wpp tfr_wpp gii_hdr latentmean_fariss v2cltort
## Min. :-319.239 Min. :-465.517 Min. :-2480 Min. :-788016 Min. :-242.574 Min. :-8.800 Min. :-0.819 Min. :-5.3362 Min. :-3.509
## 1st Qu.: -85.913 1st Qu.:-129.686 1st Qu.: -277 1st Qu.: -50724 1st Qu.:-117.050 1st Qu.:-5.955 1st Qu.:-0.537 1st Qu.:-1.4236 1st Qu.:-1.747
## Median : -40.161 Median : -50.881 Median : -65 Median : -32776 Median : -69.553 Median :-3.782 Median :-0.405 Median :-0.1710 Median :-0.446
## Mean : -56.186 Mean : -84.469 Mean : -202 Mean : -43276 Mean : -78.652 Mean :-4.065 Mean :-0.379 Mean :-0.3545 Mean :-0.422
## 3rd Qu.: -15.363 3rd Qu.: -18.328 3rd Qu.: -17 3rd Qu.: -27130 3rd Qu.: -29.676 3rd Qu.:-2.170 3rd Qu.:-0.208 3rd Qu.: 0.8403 3rd Qu.: 0.879
## Max. : -1.254 Max. : -1.765 Max. : -2 Max. : -15334 Max. : -0.283 Max. :-1.051 Max. :-0.025 Max. : 3.4595 Max. : 3.150
## NA's :8259 NA's :8259 NA's :9396 NA's :5145 NA's :8259 NA's :8259 NA's :8365 NA's :274 NA's :975
<- data %>%
data ::arrange(country, year) %>%
dplyrmutate(period = cut(year, seq(1960, 2020, 5), right = TRUE))
names(vars) <- vars <- c("pc_rgdpe_pwt", "growth_rgdpe_pwt", "pop_wpp", "pop_density_wpp",
"imr_wpp", "ufmr_wpp", "life_exp_wpp", "daly_ihme", "asfr_adol_wpp",
"mys_age_ratio_ihme", "mys_pop_ratio_ihme", "mys_ratio_hdr",
"labor_mod_ratio15_ilo", "gii_hdr", "conflict_internal",
"war_internal", "conflict_non_state", "conflict_one_sided",
"deaths_all_int", "deaths_civilians_int", "deaths_civilians_osv",
"deaths_all_nsc", "deaths_civilians_nsc", "deaths_all_ucdp",
"hom_odcwho", "deaths_all_int_rate", "deaths_civilians_int_rate",
"deaths_civilians_osv_rate", "deaths_all_nsc_rate",
"deaths_civilians_nsc_rate", "deaths_all_ucdp_rate", "hom_odcwho_rate",
"latentmean_fariss", "v2cltort", "v2clkill", "v2caviol",
"v2x_polyarchy", "v2x_libdem", "v2x_liberal", "v2x_partipdem", "v2x_partip")
<- data %>%
data ::arrange(country, period, year) %>%
dplyrgroup_by(country, period) %>%
mutate(across(all_of(vars), ~mean(.x, na.rm = TRUE), .names = "{col}_avg"), .keep = "all")
paste(vars, "avg", sep = "_")] <- lapply(data[, paste(vars, "avg", sep = "_")], function(col) {
data[, is.nan(col)] <- NA
col[return(col)
})<- c("pc_rgdpe_pwt", "pop_wpp", "pop_density_wpp", "life_exp_wpp")
take_logs <- c(take_logs, paste(take_logs, "avg", sep = "_"))
take_logs <- c("imr_wpp", "ufmr_wpp", "daly_ihme", "asfr_adol_wpp")
take_logs_negative <- c(take_logs_negative, paste(take_logs_negative, "avg", sep = "_"))
take_logs_negative summary(data[, sort(c(take_logs, take_logs_negative))])
## asfr_adol_wpp asfr_adol_wpp_avg daly_ihme daly_ihme_avg imr_wpp imr_wpp_avg life_exp_wpp life_exp_wpp_avg pc_rgdpe_pwt pc_rgdpe_pwt_avg pop_density_wpp pop_density_wpp_avg
## Min. :-242.574 Min. :-242.574 Min. :-788016 Min. :-251744 Min. :-319.239 Min. :-319.239 Min. :14.49 Min. :14.49 Min. : 244.6 Min. : 460.6 Min. : 0.615 Min. : 0.615
## 1st Qu.:-117.050 1st Qu.:-114.743 1st Qu.: -50724 1st Qu.: -51072 1st Qu.: -85.913 1st Qu.: -81.977 1st Qu.:55.06 1st Qu.:55.78 1st Qu.: 2486.8 1st Qu.: 2502.9 1st Qu.: 19.004 1st Qu.: 18.845
## Median : -69.553 Median : -68.097 Median : -32776 Median : -32862 Median : -40.161 Median : -38.269 Median :66.91 Median :67.30 Median : 6464.4 Median : 6428.3 Median : 60.446 Median : 60.951
## Mean : -78.652 Mean : -77.720 Mean : -43276 Mean : -43706 Mean : -56.186 Mean : -53.855 Mean :63.73 Mean :64.16 Mean : 13084.9 Mean : 13030.4 Mean : 265.005 Mean : 264.963
## 3rd Qu.: -29.676 3rd Qu.: -29.279 3rd Qu.: -27130 3rd Qu.: -27352 3rd Qu.: -15.363 3rd Qu.: -14.938 3rd Qu.:72.89 3rd Qu.:73.07 3rd Qu.: 16343.7 3rd Qu.: 16156.1 3rd Qu.: 140.414 3rd Qu.: 141.147
## Max. : -0.283 Max. : -0.283 Max. : -15334 Max. : -15340 Max. : -1.254 Max. : -1.254 Max. :84.43 Max. :84.43 Max. :279357.1 Max. :254249.5 Max. :26152.349 Max. :25857.551
## NA's :8259 NA's :773 NA's :5145 NA's :4137 NA's :8259 NA's :773 NA's :8259 NA's :773 NA's :1517 NA's :1445 NA's :301 NA's :299
## pop_wpp pop_wpp_avg ufmr_wpp ufmr_wpp_avg
## Min. : 6.3 Min. : 6.4 Min. :-465.517 Min. :-465.517
## 1st Qu.: 1503.2 1st Qu.: 1497.0 1st Qu.:-129.686 1st Qu.:-121.763
## Median : 6052.4 Median : 6062.8 Median : -50.881 Median : -48.615
## Mean : 30120.1 Mean : 30125.1 Mean : -84.469 Mean : -80.710
## 3rd Qu.: 18647.1 3rd Qu.: 18753.9 3rd Qu.: -18.328 3rd Qu.: -17.785
## Max. :1433783.7 Max. :1424125.7 Max. : -1.765 Max. : -1.765
## NA's :301 NA's :299 NA's :8259 NA's :773
paste("lg", take_logs, sep = "_")] <- lapply(data[, take_logs], log)
data[, paste("lg", take_logs_negative, sep = "_")] <- lapply(data[, take_logs_negative], function(col) { -log(-col) })
data[, summary(data[, paste ("lg", sort(c(take_logs, take_logs_negative)), sep = "_")])
## lg_asfr_adol_wpp lg_asfr_adol_wpp_avg lg_daly_ihme lg_daly_ihme_avg lg_imr_wpp lg_imr_wpp_avg lg_life_exp_wpp lg_life_exp_wpp_avg lg_pc_rgdpe_pwt lg_pc_rgdpe_pwt_avg lg_pop_density_wpp lg_pop_density_wpp_avg
## Min. :-5.491 Min. :-5.491 Min. :-13.577 Min. :-12.436 Min. :-5.766 Min. :-5.7659 Min. :2.673 Min. :2.674 Min. : 5.500 Min. : 6.133 Min. :-0.4861 Min. :-0.4861
## 1st Qu.:-4.763 1st Qu.:-4.743 1st Qu.:-10.834 1st Qu.:-10.841 1st Qu.:-4.453 1st Qu.:-4.4064 1st Qu.:4.008 1st Qu.:4.021 1st Qu.: 7.819 1st Qu.: 7.825 1st Qu.: 2.9447 1st Qu.: 2.9362
## Median :-4.242 Median :-4.221 Median :-10.397 Median :-10.400 Median :-3.693 Median :-3.6446 Median :4.203 Median :4.209 Median : 8.774 Median : 8.768 Median : 4.1018 Median : 4.1101
## Mean :-3.996 Mean :-3.983 Mean :-10.548 Mean :-10.556 Mean :-3.521 Mean :-3.4863 Mean :4.135 Mean :4.143 Mean : 8.773 Mean : 8.772 Mean : 3.9727 Mean : 3.9733
## 3rd Qu.:-3.390 3rd Qu.:-3.377 3rd Qu.:-10.208 3rd Qu.:-10.217 3rd Qu.:-2.732 3rd Qu.:-2.7039 3rd Qu.:4.289 3rd Qu.:4.291 3rd Qu.: 9.702 3rd Qu.: 9.690 3rd Qu.: 4.9446 3rd Qu.: 4.9498
## Max. : 1.262 Max. : 1.262 Max. : -9.638 Max. : -9.638 Max. :-0.226 Max. :-0.2263 Max. :4.436 Max. :4.436 Max. :12.540 Max. :12.446 Max. :10.1717 Max. :10.1604
## NA's :8259 NA's :773 NA's :5145 NA's :4137 NA's :8259 NA's :773 NA's :8259 NA's :773 NA's :1517 NA's :1445 NA's :301 NA's :299
## lg_pop_wpp lg_pop_wpp_avg lg_ufmr_wpp lg_ufmr_wpp_avg
## Min. : 1.847 Min. : 1.864 Min. :-6.143 Min. :-6.1431
## 1st Qu.: 7.315 1st Qu.: 7.311 1st Qu.:-4.865 1st Qu.:-4.8021
## Median : 8.708 Median : 8.710 Median :-3.929 Median :-3.8839
## Mean : 8.430 Mean : 8.431 Mean :-3.818 Mean :-3.7808
## 3rd Qu.: 9.833 3rd Qu.: 9.839 3rd Qu.:-2.908 3rd Qu.:-2.8784
## Max. :14.176 Max. :14.169 Max. :-0.568 Max. :-0.5682
## NA's :301 NA's :299 NA's :8259 NA's :773
# viol_vars <- c("conflict_internal", "war_internal", "conflict_non_state",
# "conflict_one_sided", "latentmean_fariss", "v2cltort", "v2clkill")
<- c("deaths_all_int", "deaths_civilians_int", "deaths_civilians_osv",
viol_vars_log "deaths_all_nsc", "deaths_civilians_nsc", "deaths_all_ucdp", "hom_odcwho")
<- c(viol_vars_log,
viol_vars_log paste(viol_vars_log, "rate", sep = "_"),
paste(viol_vars_log, "avg", sep = "_"),
paste(viol_vars_log, "rate_avg", sep = "_"))
paste("lg", viol_vars_log, sep = "_")] <- lapply(data[, viol_vars_log], function(col) { log(col + 1) })
data[, <- function(y_var, x_vars, df) {
CodePerformance <- paste(x_vars, collapse = " + ")
x_vars <- paste(y_var, "~", x_vars)
equation <- na.omit(get_all_vars(formula = equation, data = df, country = country, year = year))
df <- lm(formula = equation, data = df)
mod $predicted <- predict(mod)
dfpaste("perf", y_var, sep = "_")] <- df[, y_var] - df$predicted
df[, return(df[, c("country", "year", paste("perf", y_var, sep = "_"))])
}names(vars) <- vars <- paste(vars, "avg", sep = "_")
names(years) <- years <- c(seq(1965, 2015, 5), 2018)
<- lapply(vars, function(var) {
performance_measures <- lapply(years, function(year) {
results <- data[data$year == year, c("country", "year", "lg_pc_rgdpe_pwt_avg", var)]
dat if(nrow(na.omit(dat)) > 0) {
CodePerformance(y_var = var, x_vars = "lg_pc_rgdpe_pwt_avg", df = dat)
}
})<- results[!unlist(lapply(results, is.null))]
results do.call(rbind, results)
})<- Reduce(f = function(...) merge(..., by = c("country", "year"), all = TRUE), x = performance_measures)
performance_measures <- merge(data, performance_measures, by = c("country", "year"), all.x = TRUE)
data save(data, codebook, categories, variables, file = "_data/LSCMWG_working_data.RData")