IsTrue <- function(x) { !is.na(x) & x }
load("_data/LSCMWG_working_class.RData")
link <- "https://docs.google.com/spreadsheets/d/1KPUMNCixfyFU4KryzV0_5BYUHCUIyOq2heei9KdpSF8/edit?usp=sharing"
googlesheets4::gs4_deauth()
seq <- googlesheets4::read_sheet(link, sheet = "coding", na = "..")
save(seq, file = "_data/sequences.RData")
missing_countries <- c("Andorra", "Czechoslovakia", "Dominica", "East Germany", "Ethiopia", "Kosovo", "Liechtenstein", "Marshall Islands", "Monaco", "Nauru", "North Vietnam", "North Yemen", "Palau", "San Marino", "Saint Kitts & Nevis", "South Vietnam", "South Yemen", "Soviet Union", "Tuvalu", "West Germany", "Yugoslavia SFR", "South Sudan", "Serbia & Montenegro")
# table(df[, c("conf_intra", "wars_intra")])
ViolenceCats <- function(dataset = df[!(df$country %in% missing_countries), ], start_yr = 1975, end_yr = 2015, yrs_long = 3) {
  df <- dataset %>% select(country, year, conf_intra, wars_intra, latentmean_fariss, v2caviol, v2clkill, v2cltort) %>%
    filter(year >= start_yr & year <= end_yr) %>%
    arrange(country, year) %>%
    group_by(country) %>%
    mutate(conflict_yrs = sum(conf_intra > 0),
           war_yrs = sum(wars_intra > 0),
           latentmean_fariss = mean(latentmean_fariss, na.rm = TRUE),
           v2caviol = mean(v2caviol, na.rm = TRUE),
           v2clkill = mean(v2clkill, na.rm = TRUE),
           v2cltort = mean(v2cltort, na.rm = TRUE)) %>%
    filter(year == end_yr)
  df$conflict[df$conflict_yrs > 0] <- "conflict"
  df$conflict[df$conflict_yrs > (yrs_long -1)] <- paste("conflict (", yrs_long, "+ yrs)", sep = "")
  df$conflict[df$war_yrs > 0] <- "war"
  df$conflict[df$war_yrs > (yrs_long -1)] <- paste("war (", yrs_long, "+ yrs)", sep = "")
  df$conflict[df$conflict_yrs == 0 & df$war_yrs == 0] <- "none"
  df$conflict <- as.factor(df$conflict)
  df <- df %>% select(country, latentmean_fariss, v2caviol, v2clkill, v2cltort, conflict)
  vars <- c("latentmean_fariss", "v2caviol", "v2clkill", "v2cltort")
  df[, vars] <- lapply(df[, vars], function(col) {
    col[is.nan(col)] <- NA
    col
    # new <- rep(NA, length(col))
    # new[col >= quantile(col, na.rm = TRUE)[1]] <- "1st quartile"
    # new[col >= quantile(col, na.rm = TRUE)[2]] <- "2nd quartile"
    # new[col >= quantile(col, na.rm = TRUE)[3]] <- "3rd quartile"
    # new[col >= quantile(col, na.rm = TRUE)[4]] <- "4th quartile"
    # return(as.factor(new))
  })
  names(df)[-1] <- paste(names(df)[-1], start_yr, end_yr, sep = "_")
  return(df)
}
# 1961-1970 < seq starts 1975 (1971-1975) <= 1971-2015
# 1981-1990 < seq starts 1995 (1991-1995) <= 1991-2015
violence <- list(
  ViolenceCats(start_yr = 1971, end_yr = 2015, yrs_long = 5),
  ViolenceCats(start_yr = 1991, end_yr = 2015, yrs_long = 5) 
)
violence <- Reduce(f = function(...) merge(..., by = "country", all = TRUE), x = violence)
# vars <- names(df)
# vars <- vars[!str_detect(vars, "perf_")]
# lapply(c(variables$health, variables$gender), function(x) {
#   vars[str_detect(vars, x)]
# })
# vars[str_detect(vars, "class") ]
df <- df[df$year > 1970, ] %>% 
  arrange(country, year) %>% 
  group_by(country, period) %>% 
  mutate(latentmean_fariss_avg = mean(latentmean_fariss, na.rm = TRUE),
         v2caviol_avg = mean(v2caviol, na.rm = TRUE),
         v2clkill_avg = mean(v2clkill, na.rm = TRUE),
         v2cltort_avg = mean(v2cltort, na.rm = TRUE),
         hom_odcwho_rate_avg = mean(hom_odcwho_rate, na.rm = TRUE),
         deaths_all_int_rate_avg = mean(deaths_all_int_rate, na.rm = TRUE),
         deaths_all_nsc_rate_avg = mean(deaths_all_nsc_rate, na.rm = TRUE),
         deaths_civilians_osv_rate_avg = mean(deaths_civilians_osv_rate, na.rm = TRUE),
         deaths_civilians_int_rate_avg = mean(deaths_civilians_int_rate, na.rm = TRUE),
         deaths_civilians_nsc_rate_avg = mean(deaths_civilians_nsc_rate, na.rm = TRUE))
df <- df[, c("country", "year", "period", "health_class", "gender_class", "class", "class_vv", "class1990", "class_vv1990", "class1995", "class_vv1995", "life_exp_wpp_avg", "imr_wpp_avg", "mys_age_ratio_ihme_avg", "asfr_adol_wpp_avg", "latentmean_fariss_avg", "v2caviol_avg", "v2clkill_avg", "v2cltort_avg", "hom_odcwho_rate_avg", "deaths_all_int_rate_avg", "deaths_all_nsc_rate_avg", "deaths_civilians_osv_rate_avg", "deaths_civilians_int_rate_avg", "deaths_civilians_nsc_rate_avg")]
df$period <- as.integer(str_sub(df$period, 7, 10))
df <- df[df$period == df$year & df$period %in% seq(1975, 2015, 5), names(df) != "year"]
# data.frame(class = table(df$period, is.na(df$class))[, 1],
#            life = table(df$period, is.na(df$life_exp_wpp_avg))[, 1],
#            imr = table(df$period, is.na(df$imr_wpp_avg))[, 1],
#            asfr = table(df$period, is.na(df$asfr_adol_wpp_avg))[, 1],
#            mys = table(df$period, is.na(df$mys_age_ratio_ihme_avg))[, 1])
class1975 <- df[IsTrue(df$period == 1975), c("country", "class", "health_class", "gender_class")]
class1975$class_vv <- NA
class1975$class_vv[class1975$health_class < 3 & class1975$gender_class < 3] <- "low"
class1975$class_vv[class1975$health_class > 3 & class1975$gender_class > 3] <- "upp"
class1975$class_vv[!(class1975$health_class < 3 & class1975$gender_class < 3) & !(class1975$health_class > 3 & class1975$gender_class > 3)] <- "mid"
class1975$class_vv[class1975$class_vv == "mid" & class1975$health_class > class1975$gender_class] <- "H>G"
class1975$class_vv[class1975$class_vv == "mid" & class1975$health_class < class1975$gender_class] <- "G>H"
names(class1975)[-1] <- paste(names(class1975)[-1], "1975", sep = "")
df <- merge(df, class1975, by = "country", all.x = TRUE)
df <- df[!is.na(df$country), ]
df <- df[!(df$country %in% missing_countries), ]
df$life_exp_wpp_avg <- as.numeric(scale(df$life_exp_wpp_avg))
df$imr_wpp_avg <- as.numeric(scale(df$imr_wpp_avg))
df$mys_age_ratio_ihme_avg <- as.numeric(scale(df$mys_age_ratio_ihme_avg))
df$asfr_adol_wpp_avg <- as.numeric(scale(df$asfr_adol_wpp_avg))
df$health <- rowMeans(df[, c("life_exp_wpp_avg", "imr_wpp_avg")], na.rm = FALSE)
df$gender <- rowMeans(df[, c("mys_age_ratio_ihme_avg", "asfr_adol_wpp_avg")], na.rm = FALSE)
df <- df %>% 
  arrange(country, period) %>% 
  group_by(country) %>% 
  mutate(lag_imr_wpp_avg = lag(imr_wpp_avg), 
         lag_asfr_adol_wpp_avg = lag(asfr_adol_wpp_avg),
         lag_life_exp_wpp_avg = lag(life_exp_wpp_avg),
         lag_mys_age_ratio_ihme_avg = lag(mys_age_ratio_ihme_avg),
         lag_health = lag(health),
         lag_gender = lag(gender))
mean_health <- mean(df$health, na.rm = TRUE)
range_health <- range(df$health, na.rm = TRUE)
mean_gender <- mean(df$gender, na.rm = TRUE)
range_gender <- range(df$gender, na.rm = TRUE)
range_both <- range(c(range_health, range_gender))
df$health_chg <- df$health - df$lag_health
df$gender_chg <- df$gender - df$lag_gender
df$distance <- sqrt((df$health_chg)^2 + (df$gender_chg)^2)
# df$slope <- (df$health - df$lag_health)/(df$gender - df$lag_gender)
df <- df %>% group_by(country) %>% mutate(total = sum(distance, na.rm = TRUE), first = min(period), last = max(period), periods = length(unique(period)))
first <- df[df$period == df$first, c("country", "first", "periods", "class_vv1975", "class_vv1995", "total", "health", "gender")]
names(first)[7:8] <- paste("first", names(first)[7:8], sep = "_")
last <- df[df$period == df$last, c("country", "health", "gender")]
names(last)[-1] <- paste("last", names(last)[-1], sep = "_")
new <- merge(first, last, by = "country")
new$distance <- sqrt((new$last_health - new$first_health)^2 + (new$last_gender - new$first_gender)^2)
new$dist_total <- new$distance / new$total
new$dist_period <- new$distance / new$periods
new$health_period <- (new$last_health - new$first_health) / new$periods
new$gender_period <- (new$last_gender - new$first_gender) / new$periods
new[, -c(1:5)] <- lapply(new[, -c(1:5)], round, 3)
vars <- c("dist_total", "dist_period", "health_period", "gender_period")
new[, paste(vars, "quart", sep = "_")] <- lapply(new[, vars], function(col) {
  new <- rep(NA, length(col))
  new[col >= quantile(col, na.rm = TRUE)[1]] <- "1st quartile"
  new[col >= quantile(col, na.rm = TRUE)[2]] <- "2nd quartile"
  new[col >= quantile(col, na.rm = TRUE)[3]] <- "3rd quartile"
  new[col >= quantile(col, na.rm = TRUE)[4]] <- "4th quartile"
  return(as.factor(new))
})
new <- new[, !names(new) %in% c("last_health", "first_health", "last_gender", "first_gender")]
new <- merge(seq[, names(seq)[!str_detect(names(seq), "sequence")]], new, by = "country", all = TRUE)
new <- merge(new, violence, by = "country", all = TRUE)
df <- merge(df, seq[, names(seq)[!str_detect(names(seq), "sequence")]], by = "country", all = TRUE)
df <- merge(df, violence, by = "country", all = TRUE)

Countries

lapply(unique(df$country), function(ctry) {
  ggplot(data = df[df$country == ctry  & df$period > 1975, ], # & !is.na(df$lag_health) & !is.na(df$lag_gender)
         mapping = aes(x = lag_gender, y = lag_health, xend = gender, yend = health) ) + 
    coord_fixed(ratio = 1) + 
    geom_vline(xintercept = mean_gender, color = "gray") + 
    geom_hline(yintercept = mean_health, color = "gray") + 
    geom_abline(intercept = 0, slope = 1, color = "gray") + 
    geom_segment(lineend = "round", mapping = aes(color = as.factor(period))) + 
    scale_color_manual(values = c("1975" = "gray", "1980" = "red", "1985" = "orange", "1990" = "red", "1995" = "orange", 
                                  "2000" = "purple", "2005" = "blue", "2010" = "purple", "2015" = "blue")) +
    geom_point(aes(x = lag_gender[period == min(period)], y = lag_health[period == min(period)]), 
               shape = 20, size = 0.75, color = "green", fill = "green") + 
    geom_point(aes(x = gender[period == max(period)], y = health[period == max(period)]), 
               shape = 20, size = 0.75, color = "red", fill = "red") + 
    xlim(range_both[1], range_both[2]) +
    ylim(range_both[1], range_both[2]) +
    labs(x = "Gender", y = "Health") + 
    theme_classic() + theme(legend.position = "none") + 
    ggtitle(label = ctry, subtitle = paste("\nfrom 1975: ", seq$typology1975[seq$country == ctry],
                                           "\nfrom 1995: ", seq$typology1995[seq$country == ctry]))
})

Classification

1975
lookup <- table(df$class_vv1975[df$period == 2015])
lookup["no 1975 classification"] <- sum(is.na(df$class_vv1975[df$period == 2015]))
plotClass <- function(class, dat, range) {
  ggplot(data = dat, 
         mapping = aes(x = lag_gender, y = lag_health, xend = gender, yend = health, group = country, color = country)) + 
    coord_fixed(ratio = 1) + 
    geom_segment(lineend = "round", arrow = arrow(angle = 10, type = "open", length = unit(7, "points"))) + 
    geom_vline(xintercept = mean_gender, color = "gray") + 
    geom_hline(yintercept = mean_health, color = "gray") + 
    labs(x = "Gender", y = "Health") + 
    xlim(range[1], range[2]) +
    ylim(range[1], range[2]) +
    theme_classic() + theme(legend.position = "none") + 
    ggtitle(paste(class, " (", lookup[class], ")", sep = ""))
}
lapply(c("low", "H>G", "G>H", "mid", "upp"), function(class) {
  plotClass(class = class, 
            dat = df[df$class_vv1975 == class & df$period > 1975, ],
            range = range_both)
})
plotClass(class = "no 1975 classification", 
          dat = df[is.na(df$class_vv1975) & df$period > 1975, ],
          range = range_both)

1975 (from origin)
new1975 <- df[df$period > 1975, ] %>%
  arrange(country, period) %>% 
  group_by(country) %>% 
  mutate(first = min(period),
         health_first = lag_health[period == first],
         gender_first = lag_gender[period == first]) %>% 
  mutate(health = health - health_first,
         lag_health = lag_health - health_first,
         gender = gender - gender_first,
         lag_gender = lag_gender - gender_first)
# new1975 %>% select(country, period, first, health, lag_health, health_first, gender, lag_gender, gender_first) %>% print(n = 20) 
range_both_new <- range(c(range(new1975$gender, na.rm = TRUE), range(new1975$health, na.rm = TRUE)))
lapply(c("low", "H>G", "G>H", "mid", "upp"), function(class) {
  plotClass(class = class, 
            dat = new1975[new1975$class_vv1975 == class, ], 
            range = range_both_new)
})

1995
lookup <- table(df$class_vv1995[df$period == 2015])
lookup["no 1995 classification"] <- sum(is.na(df$class_vv1995[df$period == 2015]))
lapply(c("low", "H>G", "G>H", "mid", "upp"), function(class) {
  plotClass(class = class, 
            dat = df[df$class_vv1995 == class & df$period > 1995, ],
            range = range_both)
})
plotClass(class = "no 1995 classification", 
          dat = df[is.na(df$class_vv1995) & df$period > 1995, ],
          range = range_both)

1995 (from origin)
new1995 <- df[df$period > 1995, ] %>%
  arrange(country, period) %>% 
  group_by(country) %>% 
  mutate(first = min(period))
# new1995$first[new1995$first < 1995] <- 1995
new1995 <- new1995 %>% 
  mutate(health_first = lag_health[period == first],
         gender_first = lag_gender[period == first]) %>% 
  mutate(health = health - health_first, 
         lag_health = lag_health - health_first,
         gender = gender - gender_first,
         lag_gender = lag_gender - gender_first)
# new1995 %>% select(country, period, first, health, lag_health, health_first, gender, lag_gender, gender_first) %>% print(n = 100) 
range_gender_new <- range(new1995$gender, na.rm = TRUE)
range_health_new <- range(new1995$health, na.rm = TRUE)
range_both_new <- range(c(range_gender_new, range_health_new))
lapply(c("low", "H>G", "G>H", "mid", "upp"), function(class) {
  plotClass(class = class, 
            dat = new1995[new1995$class_vv1995 == class, ], 
            range = range_both_new)
})

Typology

from 1975
plotCategory <- function(cat, dat, range) {
  ggplot(data = dat, 
         mapping = aes(x = lag_gender, y = lag_health, xend = gender, yend = health, group = country, color = country)) + 
    coord_fixed(ratio = 1) + 
    geom_segment(lineend = "round", arrow = arrow(angle = 10, type = "open", length = unit(7, "points"))) + 
    geom_vline(xintercept = mean_gender, color = "gray") + 
    geom_hline(yintercept = mean_health, color = "gray") + 
    labs(x = "Gender", y = "Health") + 
    xlim(range[1], range[2]) +
    ylim(range[1], range[2]) +
    theme_classic() + theme(legend.position = "none") + 
    ggtitle(paste(cat, " (", lookup[cat], ")", sep = ""))
}
lookup <- table(seq$typology1975)
lapply(sort(unique(df$typology1975)), function(sequ) {
  plotCategory(cat = sequ, 
           dat = df[df$typology1975 == sequ & df$period > 1975, ], 
           range = range_both)
})

from 1995
lookup <- table(seq$typology1995)
lapply(sort(unique(df$typology1995)), function(sequ) {
  plotCategory(cat = sequ, 
           dat = df[df$typology1995 == sequ & df$period > 1995, ], 
           range = range_both)
})

Data
new[, c("country", "typology1975", "typology1995", "class_vv1975", "class_vv1995")]

Health & gender

new[, c("country", "first", "periods", "total", "distance", "dist_total", "dist_period", "health_period", "gender_period")]
summary(new[, c("first", "total", "distance", "dist_total", "dist_period", "health_period", "gender_period")])
##      first          total           distance        dist_total      dist_period      health_period    gender_period     
##  Min.   :1975   Min.   :0.0970   Min.   :0.0970   Min.   :0.1630   Min.   :0.03900   Min.   :0.0280   Min.   :-0.03700  
##  1st Qu.:1975   1st Qu.:0.8518   1st Qu.:0.7622   1st Qu.:0.8985   1st Qu.:0.09775   1st Qu.:0.0710   1st Qu.: 0.05325  
##  Median :1975   Median :1.3315   Median :1.2145   Median :0.9510   Median :0.14350   Median :0.1120   Median : 0.07450  
##  Mean   :1979   Mean   :1.5302   Mean   :1.3198   Mean   :0.9029   Mean   :0.15708   Mean   :0.1289   Mean   : 0.08087  
##  3rd Qu.:1975   3rd Qu.:2.0330   3rd Qu.:1.7372   3rd Qu.:0.9778   3rd Qu.:0.19575   3rd Qu.:0.1800   3rd Qu.: 0.10300  
##  Max.   :2010   Max.   :9.3300   Max.   :3.8330   Max.   :1.0000   Max.   :0.42600   Max.   :0.3510   Max.   : 0.24200

Internal conflict

lookup <- table(new$conflict_1971_2015)
lapply(c("none", "conflict", "conflict (5+ yrs)", "war", "war (5+ yrs)"), function(cat) {
  plotCategory(cat = cat, 
           dat = df[df$conflict_1971_2015 == cat & df$period > 1975, ], 
           range = range_both)
})

Bivariate distributions

Health change
data_summary <- function(x) {
  m <- mean(x)
#   ymin <- m - sd(x)
#   ymax <- m + sd(x)
  ymin <- quantile(x)[["25%"]]
  ymax <- quantile(x)[["75%"]]
  return(c(y = m, ymin = ymin, ymax = ymax))
}
ggplot(data = new, mapping = aes(x = health_period, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "health_period_quart")]), margin = 1) * 100, margin = 2), 1)

Gender change
ggplot(data = new, mapping = aes(x = gender_period, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "gender_period_quart")]), margin = 1) * 100, margin = 2), 1)

Overall change
ggplot(data = new, mapping = aes(x = dist_period, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "dist_period_quart")]), margin = 1) * 100, margin = 2), 1)

Path efficiency
ggplot(data = new, mapping = aes(x = dist_total, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "dist_total_quart")]), margin = 1) * 100, margin = 2), 1)

Internal conflict
new$conflict_1971_2015 <- factor(new$conflict_1971_2015, levels = c("none", "conflict", "conflict (5+ yrs)", "war", "war (5+ yrs)"))
ggplot(data = new, 
       mapping = aes(y = typology1975, fill = conflict_1971_2015)) +
  geom_bar(stat = "count", position = position_dodge2(preserve = "single")) +
  scale_fill_discrete(limits = c("none", "conflict", "conflict (5+ yrs)", "war", "war (5+ yrs)"), name = "") +
  theme_minimal() + theme(legend.position = "top")

# round(addmargins(prop.table(table(new[, c("typology1975", "conflict_1971_2015")])[, c(3, 1:2, 4:5)], margin = 1) * 100, margin = 2), 1)
LPI
ggplot(data = new, mapping = aes(x = latentmean_fariss_1971_2015, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "latentmean_fariss_1971_2015")]), margin = 1) * 100, margin = 2), 1)
# addmargins(table(new[, c("typology1975", "latentmean_fariss_1971_2015")]))

State torture
ggplot(data = new, mapping = aes(x = v2cltort_1971_2015, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "v2cltort_1971_2015")]), margin = 1) * 100, margin = 2), 1)
# addmargins(table(new[, c("typology1975", "v2cltort_1971_2015")]))

Societal violence
ggplot(data = new, mapping = aes(x = v2caviol_1971_2015, y = typology1975)) +
  stat_summary(fun.data = data_summary, geom = "pointrange") + 
  xlim(0, NA) + theme_minimal()
# round(addmargins(prop.table(table(new[, c("typology1975", "v2caviol_1971_2015")]), margin = 1) * 100, margin = 2), 1)
# addmargins(table(new[, c("typology1975", "v2caviol_1971_2015")]))