Library

# Turn off scientific notation
options(scipen=999)

# Load packages
library(here)        # relative file paths for reproducibility
library(tidyverse)   # data wrangling
library(stringi)     # string data wrangling
library(tigris)      # US census TIGER/Line shapefiles
library(ggplot2)     # data visualization
library(cowplot)     # data visualization plotting
library(gridExtra)   # grid for data visualizations
library(biscale)     # bivariate mapping
library(kableExtra)  # table formatting
library(scales)      # palette and number formatting
library(cluster)     # clustering algorithms
library(factoextra)  # clustering algorithms & visualization

Functions

import::here( "fips_census_regions",
              "load_svi_data",
              "merge_svi_data",
              "census_division",
              "flag_summarize",
              "summarize_county_nmtc",
              "summarize_county_lihtc",
              "elbow_plot",
             # notice the use of here::here() that points to the .R file
             # where all these R objects are created
             .from = here::here("analysis/project_data_steps_radovich.R"),
             .character_only = TRUE)

census_division

SVI Data

# Load SVI data sets
svi_2010 <- readRDS(here::here("data/raw/Census_Data_SVI/svi_2010_trt10.rds"))
svi_2020 <- readRDS(here::here("data/raw/Census_Data_SVI/svi_2020_trt10.rds"))

# Load mapping data sets
svi_county_map2010 <- readRDS(here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_county_svi_flags10.rds")))

svi_county_map2020 <- readRDS(here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_county_svi_flags20.rds")))

divisional_st_sf <- readRDS(here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_st_sf.rds")))

NMTC & LIHTC Tract Eligibility Data

# Load NMTC & LIHTC Tract Eligibility Data

orig_nmtc <- readxl::read_excel(here::here("data/raw/NMTC_LIHTC_tracts/nmtc_2011-2015_lic_110217.xlsx"), sheet="NMTC LICs 2011-2015 ACS")

high_migration_nmtc <- readxl::read_excel(here::here("data/raw/NMTC_LIHTC_tracts/nmtc_2011-2015_lic_110217.xlsx"), sheet="High migration tracts", skip=1)

nmtc_awards_data <- readxl::read_excel(here::here("data/raw/NMTC_LIHTC_tracts/NMTC_Public_Data_Release_includes_FY_2021_Data_final.xlsx"), sheet = "Projects 2 - Data Set PUBLISH.P")

lihtc_eligible <- readxl::read_excel(here::here("data/raw/NMTC_LIHTC_tracts/qct_data_2010_2011_2012.xlsx"))

lihtc_projects <- read.csv(here::here("data/raw/NMTC_LIHTC_tracts/lihtcpub/LIHTCPUB.csv"))

Load 2010 SVI Data

# National 2010 Data
svi_2010_national <- load_svi_data(svi_2010, percentile=.75)
svi_2010_national %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Divisional 2010 Data
svi_2010_divisional <- load_svi_data(svi_2010, rank_by = "divisional", location = census_division, percentile=.75)
svi_2010_divisional %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")

Load 2020 SVI Data

# National 2020 Data
svi_2020_national <- load_svi_data(svi_2020, percentile=.75)
svi_2020_national %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Divisional 2020 Data
svi_2020_divisional <- load_svi_data(svi_2020, rank_by = "divisional", location =  census_division, percentile=.75)
svi_2020_divisional %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")

Merge 2010 and 2020 Data

# Find tracts with divisional data in both 2010 and 2020
svi_divisional <- merge_svi_data(svi_2010_divisional, svi_2020_divisional)
svi_divisional %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Find tracts with divisional data in both 2010 and 2020
svi_national <- merge_svi_data(svi_2010_national, svi_2020_national)
svi_national %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")

NMTC Data Wrangling

# Rename Columns
orig_nmtc_df <- orig_nmtc %>% 
  rename("GEOID10" = "2010 Census Tract Number FIPS code. GEOID",
         "nmtc_eligibility_orig" = "Does Census Tract Qualify For NMTC Low-Income Community (LIC) on Poverty or Income Criteria?")

orig_nmtc_df %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Rename GEOID Column
high_migration_nmtc_df <- high_migration_nmtc %>% rename("GEOID10" = "2010 Census Tract Number FIPS code GEOID")

high_migration_nmtc_df %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Add column to label tracts as high migration
high_migration_nmtc_df <- high_migration_nmtc_df %>% mutate(high_migration = "Yes")

# Join to original column
orig_nmtc_df <- left_join(orig_nmtc_df, high_migration_nmtc_df, join_by(GEOID10 == GEOID10))

# Update eligibility column with coalesce()
nmtc_df <- orig_nmtc_df %>% 
  mutate(nmtc_eligibility = coalesce(high_migration, nmtc_eligibility_orig))

nmtc_df %>% filter(GEOID10 == "01087231601") %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create data set for NMTC eligible tracts
nmtc_eligible <- nmtc_df %>% 
  select(GEOID10, nmtc_eligibility, `County Code`, `County Name`, `State Abbreviation`, `State Name`) %>% 
  filter(tolower(nmtc_eligibility) == "yes")

nmtc_eligible %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Save just tract ID and eligibility
nmtc_eligible_df <- nmtc_eligible %>% select(GEOID10, nmtc_eligibility)
nmtc_eligible_df %>% head()
# Add leading zeroes
nmtc_awards <- nmtc_awards_data %>% 
  mutate(`2010 Census Tract` = str_pad(`2010 Census Tract`, 11, "left", pad=0)) %>%
  rename("GEOID10" =`2010 Census Tract`)

nmtc_awards %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create character zip_code column:
nmtc_awards <- nmtc_awards %>% 
  mutate(zip_code = str_pad(`Zip Code`, 5, "left", pad=0))

nmtc_awards %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# View years
nmtc_awards_data %>% 
  filter(`Origination Year` <= 2010)  %>% 
  select(`Origination Year`) %>% 
  unique()
# View tracts
nmtc_awards_pre2010 <- nmtc_awards %>% 
  filter(`Origination Year` <= 2010) %>% 
  count(GEOID10) %>% 
  rename("pre10_nmtc_project_cnt" = "n")

nmtc_awards_dollars_pre2010 <- nmtc_awards %>% 
  filter(`Origination Year` <= 2010) %>% 
  group_by(GEOID10) %>% 
  summarise(pre10_nmtc_dollars = sum(`Project QLICI Amount`, na.rm = TRUE))

nmtc_awards_pre2010 <- left_join(nmtc_awards_pre2010, 
                                 nmtc_awards_dollars_pre2010, 
                                 join_by(GEOID10 == GEOID10))

nmtc_awards_pre2010$pre10_nmtc_dollars_formatted <- scales::dollar_format()(nmtc_awards_pre2010$pre10_nmtc_dollars)

nmtc_awards_pre2010 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# View years
nmtc_awards_data %>% 
  filter(`Origination Year` > 2010)  %>% 
  select(`Origination Year`) %>% 
  unique()
nmtc_awards_data %>% 
  filter(`Origination Year` > 2010 & `Origination Year` <= 2020) %>% 
  select(State) %>% 
  filter(!State %in% c("American Samoa", "Guam", "Puerto Rico")) %>% 
  unique() %>% 
  mutate(n = row_number()) %>% 
  kbl() %>% kable_styling() %>% scroll_box(width = "100%")
nmtc_awards_post2010 <- nmtc_awards %>% 
  filter(`Origination Year` > 2010 & `Origination Year` <= 2020) %>% 
  count(GEOID10) %>% 
  rename("post10_nmtc_project_cnt" = "n")

nmtc_awards_dollars_post2010 <- nmtc_awards %>% 
  filter(`Origination Year` > 2010 & `Origination Year` <= 2020) %>% 
  group_by(GEOID10) %>% 
  summarise(post10_nmtc_dollars = sum(`Project QLICI Amount`, na.rm = TRUE))

nmtc_awards_post2010 <- left_join(nmtc_awards_post2010, 
                                  nmtc_awards_dollars_post2010, 
                                  join_by(GEOID10 == GEOID10))

nmtc_awards_post2010$post10_nmtc_dollars_formatted <- scales::dollar_format()(nmtc_awards_post2010$post10_nmtc_dollars)

nmtc_awards_post2010 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Divisional data
svi_divisional_nmtc_eligible <- left_join(svi_divisional, nmtc_eligible_df, join_by("GEOID_2010_trt" == "GEOID10")) %>% filter(tolower(nmtc_eligibility) == "yes")

svi_divisional_nmtc_eligible %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# National data
svi_national_nmtc_eligible <- left_join(svi_national, nmtc_eligible_df, join_by("GEOID_2010_trt" == "GEOID10")) %>% filter(tolower(nmtc_eligibility) == "yes")

svi_national_nmtc_eligible %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join divisional data to nmtc_awards_pre2010, set count to 0 if no data
svi_divisional_nmtc_eligible <- 
  left_join(svi_divisional_nmtc_eligible, nmtc_awards_pre2010, join_by("GEOID_2010_trt" == "GEOID10")) %>%
  mutate(pre10_nmtc_project_cnt = if_else(is.na(pre10_nmtc_project_cnt), 0, pre10_nmtc_project_cnt)) %>%
    mutate(pre10_nmtc_dollars = if_else(is.na(pre10_nmtc_dollars), 0, pre10_nmtc_dollars)) %>%
    mutate(pre10_nmtc_dollars_formatted = if_else(is.na(pre10_nmtc_dollars_formatted), "$0", pre10_nmtc_dollars_formatted))

# View table
svi_divisional_nmtc_eligible %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join national data to nmtc_awards_pre2010, set count to 0 if no data
svi_national_nmtc_eligible <- 
  left_join(svi_national_nmtc_eligible, nmtc_awards_pre2010, join_by("GEOID_2010_trt" == "GEOID10")) %>%
  mutate(pre10_nmtc_project_cnt = if_else(is.na(pre10_nmtc_project_cnt), 0, pre10_nmtc_project_cnt)) %>%
    mutate(pre10_nmtc_dollars = if_else(is.na(pre10_nmtc_dollars), 0, pre10_nmtc_dollars))%>%
    mutate(pre10_nmtc_dollars_formatted = if_else(is.na(pre10_nmtc_dollars_formatted), "$0", pre10_nmtc_dollars_formatted))

# View table
svi_national_nmtc_eligible %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Find count of NMTC projects after 2010
# Remove all tracts that do not have SVI flag counts for 2010
# Remove all tracts that do not have SVI flag counts for 2020
# Remove all tracts that had an NMTC project before 2010
svi_divisional_nmtc <- 
  left_join(svi_divisional_nmtc_eligible, nmtc_awards_post2010, join_by("GEOID_2010_trt" == "GEOID10")) %>%
  mutate(post10_nmtc_project_cnt = if_else(is.na(post10_nmtc_project_cnt), 0, post10_nmtc_project_cnt)) %>%
  mutate(post10_nmtc_dollars = if_else(is.na(post10_nmtc_dollars), 0, post10_nmtc_dollars))%>%
  mutate(post10_nmtc_dollars_formatted = if_else(is.na(post10_nmtc_dollars_formatted), "$0", post10_nmtc_dollars_formatted)) %>%
  mutate(nmtc_flag = if_else(post10_nmtc_project_cnt > 0, 1, 0)) %>% 
  filter(!is.na(F_TOTAL_10)) %>% 
  filter(!is.na(F_TOTAL_20)) %>% 
  filter(pre10_nmtc_project_cnt < 1)

svi_divisional_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Find count of NMTC projects after 2010
# Remove all tracts that do not have SVI flag counts for 2010
# Remove all tracts that do not have SVI flag counts for 2020
# Remove all tracts that had an NMTC project before 2010
svi_national_nmtc <- 
  left_join(svi_national_nmtc_eligible, nmtc_awards_post2010, join_by("GEOID_2010_trt" == "GEOID10")) %>%
  mutate(post10_nmtc_project_cnt = if_else(is.na(post10_nmtc_project_cnt), 0, post10_nmtc_project_cnt)) %>%
  mutate(post10_nmtc_dollars = if_else(is.na(post10_nmtc_dollars), 0, post10_nmtc_dollars))%>%
  mutate(post10_nmtc_dollars_formatted = if_else(is.na(post10_nmtc_dollars_formatted), "$0", post10_nmtc_dollars_formatted)) %>%
  mutate(nmtc_flag = if_else(post10_nmtc_project_cnt > 0, 1, 0)) %>% 
  filter(!is.na(F_TOTAL_10)) %>% 
  filter(!is.na(F_TOTAL_20)) %>% 
  filter(pre10_nmtc_project_cnt < 1)

svi_national_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# See all the states in data set with eligible tracts
svi_national_nmtc %>% 
  select(state) %>% 
  arrange(state) %>% 
  unique() %>% 
  mutate(n = row_number()) %>% 
  kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# See all the states in data set with projects
svi_national_nmtc %>% 
  filter(nmtc_flag == 1) %>% 
  select(state) %>% 
  arrange(state) %>% 
  unique() %>% 
  mutate(n = row_number()) %>% 
  kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_national_nmtc_county_sum <- summarize_county_nmtc(svi_national_nmtc)

svi_national_nmtc_county_sum %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_divisional_nmtc_county_sum <- summarize_county_nmtc(svi_divisional_nmtc)
svi_divisional_nmtc_county_sum %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create data frame of NMTC eligible tracts 2010 nationally
svi_national_nmtc10 <- svi_national_nmtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_10, E_TOTPOP_10) %>% rename("F_TOTAL" = "F_TOTAL_10")

# Count national-level SVI flags for 2010, create unified fips column
svi_2010_national_county_flags_nmtc <- flag_summarize(svi_national_nmtc10, "E_TOTPOP_10") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Add suffix to flag columns 2010
colnames(svi_2010_national_county_flags_nmtc)[11:15] <- paste0(colnames(svi_2010_national_county_flags_nmtc)[11:15], 10)

# Create data frame of NMTC eligible tracts 2020 nationally
svi_national_nmtc20 <- svi_national_nmtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_20, E_TOTPOP_20) %>% rename("F_TOTAL" = "F_TOTAL_20")

# Count national-level SVI flags for 2020, create unified fips column
svi_2020_national_county_flags_nmtc <- flag_summarize(svi_national_nmtc20, "E_TOTPOP_20") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Identify needed columns for 2020, add suffix
colnames(svi_2020_national_county_flags_nmtc)[11:15] <- paste0(colnames(svi_2020_national_county_flags_nmtc)[11:15], "20")

# Filter to needed columns for 2020 to avoid duplicate column column names
svi_2020_national_county_flags_join_nmtc <- svi_2020_national_county_flags_nmtc %>% ungroup() %>% select("fips_county_st", all_of(colnames(svi_2020_national_county_flags_nmtc)[11:15]))
 
# Join 2010 and 2020 data
svi_national_county_flags_nmtc <- left_join(svi_2010_national_county_flags_nmtc, svi_2020_national_county_flags_join_nmtc, join_by("fips_county_st" == "fips_county_st")) 

svi_national_county_flags_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join flags with NMTC county project data
svi_national_county_nmtc <- left_join(svi_national_nmtc_county_sum,
                                      svi_national_county_flags_nmtc,
                                    join_by("State" == "state", "County" == "county",
                                            "Division" == "division"))

svi_national_county_nmtc$post10_nmtc_project_cnt[is.na(svi_national_county_nmtc$post10_nmtc_project_cnt)] <- 0

svi_national_county_nmtc$county_name <- paste0(svi_national_county_nmtc$County, ", ", svi_national_county_nmtc$State)

svi_national_county_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create data frame of NMTC eligible tracts 2020 nationally
svi_divisional_nmtc10 <- svi_divisional_nmtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_10, E_TOTPOP_10) %>% rename("F_TOTAL" = "F_TOTAL_10")

# Count divisional-level SVI flags for 2010, create unified fips column
svi_2010_divisional_county_flags_nmtc <- flag_summarize(svi_divisional_nmtc10, "E_TOTPOP_10") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Add suffix to flag columns 2010
colnames(svi_2010_divisional_county_flags_nmtc)[11:15] <- paste0(colnames(svi_2010_divisional_county_flags_nmtc)[11:15], "10")

# Create data frame of NMTC eligible tracts 2020 nationally
svi_divisional_nmtc20 <- svi_divisional_nmtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_20, E_TOTPOP_20) %>% rename("F_TOTAL" = "F_TOTAL_20")

# Count divisional-level SVI flags for 2020, create unified fips column
svi_2020_divisional_county_flags_nmtc <- flag_summarize(svi_divisional_nmtc20, "E_TOTPOP_20") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Identify needed columns for 2020
colnames(svi_2020_divisional_county_flags_nmtc)[11:15] <- paste0(colnames(svi_2020_divisional_county_flags_nmtc)[11:15], "20")

# Filter to needed columns for 2020 to avoid duplicate column column names
svi_2020_divisional_county_flags_join_nmtc <- svi_2020_divisional_county_flags_nmtc %>% ungroup() %>% select("fips_county_st", all_of(colnames(svi_2020_divisional_county_flags_nmtc)[11:15]))
 
# Join 2010 and 2020 data
svi_divisional_county_flags_nmtc <- left_join(svi_2010_divisional_county_flags_nmtc, svi_2020_divisional_county_flags_join_nmtc, join_by("fips_county_st" == "fips_county_st")) 

svi_divisional_county_flags_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join flags with NMTC county project summary data for division
svi_divisional_county_nmtc <- left_join(svi_divisional_nmtc_county_sum, 
                                        svi_divisional_county_flags_nmtc,
                                    join_by("State" == "state", "County" == "county",
                                            "Division" == "division"))

svi_divisional_county_nmtc$post10_nmtc_project_cnt[is.na(svi_divisional_county_nmtc $post10_nmtc_project_cnt)] <- 0

svi_divisional_county_nmtc$county_name <- paste0(svi_divisional_county_nmtc$County, ", ", svi_divisional_county_nmtc$State)

svi_divisional_county_nmtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")

LIHTC Data Wrangling

lihtc_eligible_flag <- lihtc_eligible %>% 
  select("fips", "state", "county", "stcnty", "tract", "metro", "cbsa", "qct_2010") %>% 
  rename("GEOID10" = "fips") %>% 
  mutate(lihtc_eligibility = if_else(qct_2010 == 1, "Yes", "No")) %>% 
  filter(tolower(lihtc_eligibility) == "yes") %>% 
  select(GEOID10, lihtc_eligibility)

lihtc_eligible_flag %>% head() 
lihtc_projects10 <- lihtc_projects %>% 
  filter(yr_alloc < 8000) %>% 
  filter(yr_alloc <= 2010) %>% 
  count(fips2010) %>% 
  rename("pre10_lihtc_project_cnt" = "n")

lihtc_projects10 %>% head() 
lihtc_dollars10 <- lihtc_projects %>% 
  filter(yr_alloc < 8000) %>% 
  filter(yr_alloc <= 2010) %>%
  select(fips2010, allocamt)

lihtc_dollars10$allocamt[is.na(lihtc_dollars10$allocamt)] <- 0

lihtc_dollars10 <- lihtc_dollars10 %>% 
  group_by(fips2010) %>% 
  summarise(pre10_lihtc_project_dollars = sum(allocamt, na.rm = TRUE))

lihtc_dollars10 %>% head() 
# Join to projects data set
lihtc_projects10 <- left_join(lihtc_projects10, lihtc_dollars10, join_by(fips2010 == fips2010))

lihtc_projects10 %>% head()
lihtc_projects20 <- lihtc_projects %>% 
  filter(yr_alloc < 8000) %>% 
  filter(yr_alloc > 2010) %>% 
  filter(yr_alloc < 2021) %>% 
  count(fips2010) %>% 
  rename("post10_lihtc_project_cnt" = "n")

lihtc_projects20 %>% head() 
lihtc_dollars20 <- lihtc_projects %>% 
  filter(yr_alloc < 8000) %>% 
  filter(yr_alloc > 2010) %>% 
  filter(yr_alloc < 2021) %>% 
  select(fips2010, allocamt)

lihtc_dollars20$allocamt[is.na(lihtc_dollars20$allocamt)] <- 0

lihtc_dollars20 <- lihtc_dollars20 %>% 
  group_by(fips2010) %>% 
  summarise(post10_lihtc_project_dollars = sum(allocamt, na.rm = TRUE))

lihtc_dollars20 %>% head() 
lihtc_projects20 <- left_join(lihtc_projects20, lihtc_dollars20, join_by(fips2010 == fips2010))

lihtc_projects20 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join with divisional SVI data
svi_divisional_lihtc10 <- left_join(svi_divisional, lihtc_projects10, join_by("GEOID_2010_trt" == "fips2010"))

svi_divisional_lihtc10 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join with national SVI data
svi_national_lihtc10 <- left_join(svi_national, lihtc_projects10, join_by("GEOID_2010_trt" == "fips2010"))

svi_national_lihtc10 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Add 2020 data
svi_divisional_lihtc20 <- left_join(svi_divisional_lihtc10, lihtc_projects20, join_by("GEOID_2010_trt" == "fips2010"))

svi_divisional_lihtc20 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_national_lihtc20 <- left_join(svi_national_lihtc10, lihtc_projects20, join_by("GEOID_2010_trt" == "fips2010"))

svi_national_lihtc20 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_divisional_lihtc20 <- svi_divisional_lihtc20 %>% 
  filter(is.na(pre10_lihtc_project_cnt)) %>% 
  filter(post10_lihtc_project_cnt >= 1) %>% 
  select(GEOID_2010_trt, pre10_lihtc_project_cnt, pre10_lihtc_project_dollars, post10_lihtc_project_cnt, post10_lihtc_project_dollars)

# View data
svi_divisional_lihtc20 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_national_lihtc20 <- svi_national_lihtc20 %>% 
  filter(is.na(pre10_lihtc_project_cnt)) %>% 
  filter(post10_lihtc_project_cnt >= 1) %>% 
  select(GEOID_2010_trt, pre10_lihtc_project_cnt, pre10_lihtc_project_dollars, post10_lihtc_project_cnt, post10_lihtc_project_dollars)

# View data
svi_national_lihtc20 %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Filter SVI divisional data to remove all tracts that had a project in 2010 or before:
svi_divisional_lihtc <-  svi_divisional %>% 
  filter(! GEOID_2010_trt %in% lihtc_projects10$fips2010)

# Merge SVI divisional data with post 2010 project data, create flag for projects (1 for tracts that have LIHTC project, 0 for those that do not):
svi_divisional_lihtc <- left_join(svi_divisional_lihtc, 
                                  svi_divisional_lihtc20, 
                                  join_by("GEOID_2010_trt" == "GEOID_2010_trt")) %>% 
                        mutate(pre10_lihtc_project_cnt = replace_na(pre10_lihtc_project_cnt, 0),
                               post10_lihtc_project_cnt = replace_na(post10_lihtc_project_cnt, 0),
                               pre10_lihtc_project_dollars = replace_na(pre10_lihtc_project_dollars, 0),
                               post10_lihtc_project_dollars = replace_na(post10_lihtc_project_dollars, 0),
                               lihtc_flag = if_else(post10_lihtc_project_cnt >= 1, 1, 0))

# Finally, we want to filter our dataset to only have tracts that are eligible for the LIHTC program and that have SVI data:
svi_divisional_lihtc <- left_join(svi_divisional_lihtc, lihtc_eligible_flag, 
                                  join_by("GEOID_2010_trt" == "GEOID10")) %>%
                        filter(tolower(lihtc_eligibility) == "yes") %>%
                        filter(!is.na(F_TOTAL_10)) %>% 
                        filter(!is.na(F_TOTAL_20)) 


# View data
svi_divisional_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Filter SVI national data to remove all tracts that had a project in 2010 or before:
svi_national_lihtc <-  svi_national %>% 
  filter(! GEOID_2010_trt %in% lihtc_projects10$fips2010)

# Merge SVI national data with post 2010 project data, create flag for projects (1 for tracts that have LIHTC project, 0 for those that do not):
svi_national_lihtc <- left_join(svi_national_lihtc, 
                                  svi_national_lihtc20, 
                                  join_by("GEOID_2010_trt" == "GEOID_2010_trt")) %>% 
                        mutate(pre10_lihtc_project_cnt = replace_na(pre10_lihtc_project_cnt, 0),
                               post10_lihtc_project_cnt = replace_na(post10_lihtc_project_cnt, 0),
                                pre10_lihtc_project_dollars = replace_na(pre10_lihtc_project_dollars, 0),
                               post10_lihtc_project_dollars = replace_na(post10_lihtc_project_dollars, 0),
                               lihtc_flag = if_else(post10_lihtc_project_cnt >= 1, 1, 0))

# Finally, we want to filter our dataset to only have tracts that are eligible for the LIHTC program and that have SVI data:
svi_national_lihtc <- left_join(svi_national_lihtc, lihtc_eligible_flag, 
                                  join_by("GEOID_2010_trt" == "GEOID10")) %>%
                        filter(tolower(lihtc_eligibility) == "yes") %>%
                        filter(!is.na(F_TOTAL_10)) %>% 
                        filter(!is.na(F_TOTAL_20)) 


# View data
svi_national_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_national_lihtc_county_sum <- summarize_county_lihtc(svi_national_lihtc)

svi_national_lihtc_county_sum %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
svi_divisional_lihtc_county_sum <- summarize_county_lihtc(svi_divisional_lihtc)
svi_divisional_lihtc_county_sum %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create data frame of LIHTC eligible tracts 2010 nationally
svi_national_lihtc10 <- svi_national_lihtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_10, E_TOTPOP_10) %>% rename("F_TOTAL" = "F_TOTAL_10")

# Count national-level SVI flags for 2010, create unified fips column
svi_2010_national_county_flags_lihtc <- flag_summarize(svi_national_lihtc10, "E_TOTPOP_10") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Add suffix to flag columns 2010
colnames(svi_2010_national_county_flags_lihtc)[11:15] <- paste0(colnames(svi_2010_national_county_flags_lihtc)[11:15], 10)

# Create data frame of LIHTC eligible tracts 2020 nationally
svi_national_lihtc20 <- svi_national_lihtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_20, E_TOTPOP_20) %>% rename("F_TOTAL" = "F_TOTAL_20")

# Count national-level SVI flags for 2020, create unified fips column
svi_2020_national_county_flags_lihtc <- flag_summarize(svi_national_lihtc20, "E_TOTPOP_20") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Identify needed columns for 2020, add suffix
colnames(svi_2020_national_county_flags_lihtc)[11:15] <- paste0(colnames(svi_2020_national_county_flags_lihtc)[11:15], "20")

# Filter to needed columns for 2020 to avoid duplicate column column names
svi_2020_national_county_flags_join_lihtc <- svi_2020_national_county_flags_lihtc %>% ungroup() %>% select("fips_county_st", all_of(colnames(svi_2020_national_county_flags_lihtc)[11:15]))
 
# Join 2010 and 2020 data
svi_national_county_flags_lihtc <- left_join(svi_2010_national_county_flags_lihtc, svi_2020_national_county_flags_join_lihtc, join_by("fips_county_st" == "fips_county_st")) 

svi_national_county_flags_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join data sets
svi_national_county_lihtc <- left_join(svi_national_lihtc_county_sum,
                                      svi_national_county_flags_lihtc,
                                    join_by("State" == "state", "County" == "county",
                                            "Division" == "division"))

svi_national_county_lihtc$post10_lihtc_project_cnt[is.na(svi_national_county_lihtc$post10_lihtc_project_cnt)] <- 0

svi_national_county_lihtc$county_name <- paste0(svi_national_county_lihtc$County, ", ", svi_national_county_lihtc$State)

svi_national_county_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Create data frame of LIHTC eligible tracts 2020 nationally
svi_divisional_lihtc10 <- svi_divisional_lihtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_10, E_TOTPOP_10) %>% rename("F_TOTAL" = "F_TOTAL_10")

# Count divisional-level SVI flags for 2010, create unified fips column
svi_2010_divisional_county_flags_lihtc <- flag_summarize(svi_divisional_lihtc10, "E_TOTPOP_10") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Add suffix to flag columns 2010
colnames(svi_2010_divisional_county_flags_lihtc)[11:15] <- paste0(colnames(svi_2010_divisional_county_flags_lihtc)[11:15], "10")

# Create data frame of LIHTC eligible tracts 2020 nationally
svi_divisional_lihtc20 <- svi_divisional_lihtc %>% select(GEOID_2010_trt, FIPS_st, FIPS_county, 
    state, state_name, county, region_number, region, division_number, 
    division, F_TOTAL_20, E_TOTPOP_20) %>% rename("F_TOTAL" = "F_TOTAL_20")

# Count divisional-level SVI flags for 2020, create unified fips column
svi_2020_divisional_county_flags_lihtc <- flag_summarize(svi_divisional_lihtc20, "E_TOTPOP_20") %>% unite("fips_county_st", FIPS_st:FIPS_county, remove = FALSE, sep="")

# Identify needed columns for 2020
colnames(svi_2020_divisional_county_flags_lihtc)[11:15] <- paste0(colnames(svi_2020_divisional_county_flags_lihtc)[11:15], "20")

# Filter to needed columns for 2020 to avoid duplicate column column names
svi_2020_divisional_county_flags_join_lihtc <- svi_2020_divisional_county_flags_lihtc %>% ungroup() %>% select("fips_county_st", all_of(colnames(svi_2020_divisional_county_flags_lihtc)[11:15]))
 
# Join 2010 and 2020 data
svi_divisional_county_flags_lihtc <- left_join(svi_2010_divisional_county_flags_lihtc, svi_2020_divisional_county_flags_join_lihtc, join_by("fips_county_st" == "fips_county_st")) 

svi_divisional_county_flags_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
# Join data sets
svi_divisional_county_lihtc <- left_join(svi_divisional_lihtc_county_sum, 
                                        svi_divisional_county_flags_lihtc,
                                    join_by("State" == "state", "County" == "county",
                                            "Division" == "division"))

svi_divisional_county_lihtc$post10_lihtc_project_cnt[is.na(svi_divisional_county_lihtc $post10_lihtc_project_cnt)] <- 0

svi_divisional_county_lihtc$county_name <- paste0(svi_divisional_county_lihtc$County, ", ", svi_divisional_county_lihtc$State)

svi_divisional_county_lihtc %>% head() %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")

Exploratory Data Analysis

NMTC in Pacific Division

svi_divisional_county_nmtc_projects <- svi_divisional_county_nmtc %>% filter(post10_nmtc_project_cnt > 0)

Summary Statistics

summary(svi_divisional_county_nmtc_projects$flag_count10)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   26.25   56.00  301.68  199.75 9210.00
summary(svi_divisional_county_nmtc_projects$post10_nmtc_project_dollars)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    669550   9561257  18612500  41859846  35824250 987407086

Based on the summary statistics we can see a wide spread in SVI flags for the Pacific Division ranging from one to 9210 flags. However, based on the median of these flags, more counties are closer to the lower end of that range, with 75% of counties under 200 flags. Los Angeles County has 9210 flags, but is not considered an outlier because it receives a proportional amount of project dollars.

Tax credit spending is also widely varied in this division, with spending anywhere between $669K to $987M.

# Scatterplot
# y is our independent variable (NMTC Project Dollars),  
# x is our dependent variable (SVI flag count)
ggplot2::ggplot(svi_divisional_county_nmtc_projects,
                aes(x=flag_count10,
                    y=post10_nmtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

# Pearson's r calculation
cor(svi_divisional_county_nmtc_projects$flag_count10, svi_divisional_county_nmtc_projects$post10_nmtc_project_dollars, method = "pearson")
## [1] 0.9603179

According to the Pearson’s coefficient, there is a very strong relationship between SVI flags and tax credit spending, with R = 0.96. This means that for the higher amount of flags a county has, they are more likely to receive tax credits. Similar to the national data, most counties are clustered in the bottom quadrant with less than 1250 flags and receiving less than $125,000,000. There is one influential outlier, as stated previously, receiving around $989 million.

boxplot(svi_divisional_county_nmtc_projects$flag_count10)

boxplot.stats(svi_divisional_county_nmtc_projects$flag_count10)$out %>% sort(decreasing = TRUE)
## [1] 9210 1451 1305  978  931  831  815  669  596
svi_divisional_county_nmtc_projects %>% 
  select(county_name, flag_count10, post10_nmtc_dollars_formatted) %>% 
  arrange(desc(flag_count10)) %>% 
  head(7) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
county_name flag_count10 post10_nmtc_dollars_formatted
Los Angeles County, CA 9210 \$987,407,086
Riverside County, CA 1451 \$40,871,824
San Diego County, CA 1305 \$221,738,411
Orange County, CA 978 \$12,463,033
Fresno County, CA 931 \$115,258,480
Sacramento County, CA 831 \$3,630,000
Alameda County, CA 815 \$119,049,224

Los Angeles County has the highest number of flags and the highest tax credit, which makes sense given our correlation and then there is a steep drop off to the county with the second highest amount of SVI flags at 1451. The rest of the outliers are much closer together, but still span a range of 600 SVI flags.

svi_divisional_nmtc_cluster <- svi_divisional_county_nmtc_projects %>% 
                            select(county_name, post10_nmtc_project_dollars, 
                                   flag_count10) %>% 
                            remove_rownames %>% 
                            column_to_rownames(var="county_name")

# Remove nulls, if in dataset
svi_divisional_nmtc_cluster <- na.omit(svi_divisional_nmtc_cluster)


# Scale numeric variables
svi_divisional_nmtc_cluster <- scale(svi_divisional_nmtc_cluster)


svi_divisional_nmtc_cluster %>% head(5)
##                               post10_nmtc_project_dollars flag_count10
## Aleutians East Borough, AK                     -0.2292355   -0.2754180
## Anchorage Municipality, AK                     -0.2816092   -0.2311900
## Wade Hampton Census Area, AK                   -0.1795408   -0.2744770
## Yukon-Koyukuk Census Area, AK                  -0.3005472   -0.2594206
## Alameda County, CA                              0.6780208    0.4830462

K-Means Clustering

set.seed(123)
k2_nmtc_div <- kmeans(svi_divisional_nmtc_cluster, centers = 2, nstart = 25)
set.seed(123)
k3_nmtc_div <- kmeans(svi_divisional_nmtc_cluster, centers = 3, nstart = 25)
set.seed(123)
k4_nmtc_div <- kmeans(svi_divisional_nmtc_cluster, centers = 4, nstart = 25)
set.seed(123)
k5_nmtc_div <- kmeans(svi_divisional_nmtc_cluster, centers = 5, nstart = 25)
# plots to compare
p_k2_nmtc_div <- factoextra::fviz_cluster(k2_nmtc_div, geom = "point", data = svi_divisional_nmtc_cluster) + ggtitle("k = 2")

p_k3_nmtc_div <- factoextra::fviz_cluster(k3_nmtc_div, geom = "point", data = svi_divisional_nmtc_cluster) + ggtitle("k = 3")

p_k4_nmtc_div <- factoextra::fviz_cluster(k4_nmtc_div, geom = "point",  data = svi_divisional_nmtc_cluster) + ggtitle("k = 4")

p_k5_nmtc_div <- factoextra::fviz_cluster(k5_nmtc_div, geom = "point",  data = svi_divisional_nmtc_cluster) + ggtitle("k = 5")

grid.arrange(p_k2_nmtc_div, p_k3_nmtc_div, p_k4_nmtc_div, p_k5_nmtc_div, nrow = 2)

elbow_plot(svi_divisional_nmtc_cluster)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15

Spacing between clusters substantially reduces after two, so we will proceed with two clusters.

p_k2_nmtc_div <- factoextra::fviz_cluster(k2_nmtc_div, geom = "point", data = svi_divisional_nmtc_cluster) + ggtitle("k = 2")

p_k2_nmtc_div

Since we have chosen two clusters, the highest data point is in it’s own grouping which will help to remove it’s influence on the remainder of the data set without completely discounting it.

svi_divisional_nmtc_cluster_label <- as.data.frame(svi_divisional_nmtc_cluster) %>%
                                  rownames_to_column(var = "county_name") %>%
                                  as_tibble() %>%
                                  mutate(cluster = k2_nmtc_div$cluster) %>%
                                  select(county_name, cluster)

svi_divisional_county_nmtc_projects2 <- left_join(svi_divisional_county_nmtc_projects, svi_divisional_nmtc_cluster_label, join_by(county_name == county_name))

# View county counts in each cluster
table(svi_divisional_county_nmtc_projects2$cluster)
## 
##  1  2 
##  1 77
# Cluster 1 Scatterplot
# y is our independent variable (NMTC Project Dollars),  
# x is our dependent variable (SVI flag count)

svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 1) %>%
  ggplot2::ggplot(aes(x=flag_count10,
                    y=post10_nmtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 1) %>%
  select(flag_count10,post10_nmtc_project_dollars) %>%
  cor(method = "pearson")
##                             flag_count10 post10_nmtc_project_dollars
## flag_count10                          NA                          NA
## post10_nmtc_project_dollars           NA                          NA
svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 1) %>%
  select(county_name, flag_count10, post10_nmtc_dollars_formatted) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
county_name flag_count10 post10_nmtc_dollars_formatted
Los Angeles County, CA 9210 \$987,407,086

Since there is only one county in this cluster, we can not directly calculate any correlation between social vulnerability and tax credits, but we can infer that as social vulnerability increases, so does the amount of tax credits for that county. We can infer this because Los Angeles County has both the highest tax credits and number if SVI flags.

# Cluster 2 Scatterplot
# y is our independent variable (NMTC Project Dollars),  
# x is our dependent variable (SVI flag count)

svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 2) %>%
  ggplot2::ggplot(aes(x=flag_count10,
                    y=post10_nmtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 2) %>%
  select(flag_count10,post10_nmtc_project_dollars) %>%
  cor(method = "pearson")
##                             flag_count10 post10_nmtc_project_dollars
## flag_count10                   1.0000000                   0.5307725
## post10_nmtc_project_dollars    0.5307725                   1.0000000
svi_divisional_county_nmtc_projects2 %>% 
  filter(cluster == 2) %>%
  select(county_name, flag_count10, post10_nmtc_dollars_formatted) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
county_name flag_count10 post10_nmtc_dollars_formatted
Aleutians East Borough, AK 9 \$15,762,500
Anchorage Municipality, AK 56 \$9,800,000
Wade Hampton Census Area, AK 10 \$21,420,000
Yukon-Koyukuk Census Area, AK 26 \$7,644,000
Alameda County, CA 815 \$119,049,224
Butte County, CA 171 \$22,355,000
Contra Costa County, CA 346 \$24,726,000
Del Norte County, CA 25 \$39,615,000
Fresno County, CA 931 \$115,258,480
Humboldt County, CA 75 \$22,330,000
Kern County, CA 669 \$31,290,000
Madera County, CA 117 \$10,864,000
Merced County, CA 283 \$18,982,900
Napa County, CA 36 \$29,651,000
Orange County, CA 978 \$12,463,033
Riverside County, CA 1451 \$40,871,824
Sacramento County, CA 831 \$3,630,000
San Diego County, CA 1305 \$221,738,411
San Francisco County, CA 316 \$68,150,600
San Luis Obispo County, CA 56 \$47,941,400
San Mateo County, CA 161 \$47,864,844
Santa Barbara County, CA 231 \$9,568,778
Santa Clara County, CA 596 \$41,829,813
Santa Cruz County, CA 106 \$8,342,000
Shasta County, CA 135 \$48,005,000
Siskiyou County, CA 56 \$18,625,000
Solano County, CA 163 \$8,500,000
Stanislaus County, CA 394 \$8,614,600
Tehama County, CA 40 \$13,000,000
Ventura County, CA 396 \$20,200,000
Yolo County, CA 83 \$6,790,000
Hawaii County, HI 56 \$99,555,000
Honolulu County, HI 343 \$35,830,900
Maui County, HI 38 \$22,038,000
Baker County, OR 16 \$8,148,000
Clackamas County, OR 54 \$980,000
Clatsop County, OR 9 \$11,640,000
Coos County, OR 38 \$35,804,300
Crook County, OR 12 \$5,820,000
Curry County, OR 9 \$12,610,000
Douglas County, OR 45 \$66,520,000
Hood River County, OR 1 \$16,100,000
Jackson County, OR 102 \$9,558,750
Josephine County, OR 54 \$20,480,000
Klamath County, OR 55 \$6,547,500
Lake County, OR 9 \$7,275,000
Lane County, OR 175 \$28,810,000
Lincoln County, OR 31 \$2,988,434
Linn County, OR 41 \$17,640,000
Malheur County, OR 24 \$19,730,000
Marion County, OR 110 \$4,800,000
Multnomah County, OR 262 \$52,382,352
Polk County, OR 17 \$12,480,000
Umatilla County, OR 26 \$29,975,000
Wallowa County, OR 4 \$3,750,000
Wasco County, OR 18 \$3,884,000
Washington County, OR 104 \$9,081,000
Adams County, WA 19 \$30,510,000
Benton County, WA 58 \$15,480,000
Clallam County, WA 31 \$7,620,320
Clark County, WA 133 \$64,280,000
Columbia County, WA 4 \$19,500,000
Cowlitz County, WA 60 \$669,550
Ferry County, WA 14 \$16,005,000
Grant County, WA 57 \$15,520,000
Grays Harbor County, WA 47 \$83,180,621
King County, WA 358 \$111,041,500
Mason County, WA 27 \$12,330,000
Okanogan County, WA 37 \$33,838,866
Pend Oreille County, WA 18 \$19,310,000
Pierce County, WA 290 \$76,005,801
Skagit County, WA 45 \$9,994,000
Snohomish County, WA 146 \$47,437,840
Spokane County, WA 199 \$15,328,238
Whatcom County, WA 38 \$5,940,000
Whitman County, WA 20 \$15,757,500
Yakima County, WA 200 \$18,600,000

By removing Los Angeles County, we can see a weaker correlation for counties with SVI flags between 0-1500 receiving 0-225,000,000 NMTC dollars. For the majority of counties within the Pacific Division thers is a moderately positive correlation between more vulnerable counties receiving more NMTC dollars than less vulnerable counties.

Bivariate Mapping

divisional_county_sf <- svi_county_map2010 %>% select(COUNTYFP, STATEFP, geometry)

divisional_county_sf %>% head(5)
## Simple feature collection with 5 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2327771 ymin: -195140.7 xmax: -1953556 ymax: 782440.1
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
##   COUNTYFP STATEFP                       geometry
## 1      035      06 MULTIPOLYGON (((-1987497 61...
## 2      049      06 MULTIPOLYGON (((-1992512 76...
## 3      075      06 MULTIPOLYGON (((-2327608 35...
## 4      083      06 MULTIPOLYGON (((-2157081 -1...
## 5      091      06 MULTIPOLYGON (((-2025220 47...
# Join our NMTC projects data with our shapefile geocoordinates
svi_divisional_county_nmtc_sf <- left_join(svi_divisional_county_nmtc_projects, divisional_county_sf, join_by("FIPS_st" == "STATEFP", "FIPS_county" == "COUNTYFP"))

svi_divisional_county_nmtc_sf %>% head(5) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County Division post10_nmtc_project_cnt tract_cnt post10_nmtc_project_dollars post10_nmtc_dollars_formatted fips_county_st FIPS_st FIPS_county state_name region_number region division_number flag_count10 pop10 flag_by_pop10 flag_count_quantile10 flag_pop_quantile10 flag_count20 pop20 flag_by_pop20 flag_count_quantile20 flag_pop_quantile20 county_name geometry
AK Aleutians East Borough Pacific Division 1 1 15762500 \$15,762,500 02013 02 013 Alaska 4 West Region 9 9 3703 0.0024305 0.2 1.0 6 3389 0.0017704 0.2 1.0 Aleutians East Borough, AK MULTIPOLYGON (((-2385249 -1…
AK Anchorage Municipality Pacific Division 1 13 9800000 \$9,800,000 02020 02 020 Alaska 4 West Region 9 56 64432 0.0008691 0.8 0.2 73 69679 0.0010477 0.8 0.4 Anchorage Municipality, AK MULTIPOLYGON (((-1927463 -1…
AK Wade Hampton Census Area Pacific Division 1 1 21420000 \$21,420,000 02270 02 270 Alaska 4 West Region 9 10 7398 0.0013517 0.4 0.8 11 8298 0.0013256 0.4 0.6 Wade Hampton Census Area, AK MULTIPOLYGON (((-2310112 -1…
AK Yukon-Koyukuk Census Area Pacific Division 1 3 7644000 \$7,644,000 02290 02 290 Alaska 4 West Region 9 26 4027 0.0064564 0.6 1.0 28 3979 0.0070369 0.6 1.0 Yukon-Koyukuk Census Area, AK MULTIPOLYGON (((-1736112 -9…
CA Alameda County Pacific Division 10 134 119049224 \$119,049,224 06001 06 001 California 4 West Region 9 815 563385 0.0014466 1.0 0.8 690 615018 0.0011219 1.0 0.6 Alameda County, CA MULTIPOLYGON (((-2266160 34…
# Create classes for bivariate mapping 
svi_divisional_county_nmtc_sf <- bi_class(svi_divisional_county_nmtc_sf, x = flag_count10, y = post10_nmtc_project_dollars, style = "quantile", dim = 3)

# View data
svi_divisional_county_nmtc_sf %>% head(5) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County Division post10_nmtc_project_cnt tract_cnt post10_nmtc_project_dollars post10_nmtc_dollars_formatted fips_county_st FIPS_st FIPS_county state_name region_number region division_number flag_count10 pop10 flag_by_pop10 flag_count_quantile10 flag_pop_quantile10 flag_count20 pop20 flag_by_pop20 flag_count_quantile20 flag_pop_quantile20 county_name geometry bi_class
AK Aleutians East Borough Pacific Division 1 1 15762500 \$15,762,500 02013 02 013 Alaska 4 West Region 9 9 3703 0.0024305 0.2 1.0 6 3389 0.0017704 0.2 1.0 Aleutians East Borough, AK MULTIPOLYGON (((-2385249 -1… 1-2
AK Anchorage Municipality Pacific Division 1 13 9800000 \$9,800,000 02020 02 020 Alaska 4 West Region 9 56 64432 0.0008691 0.8 0.2 73 69679 0.0010477 0.8 0.4 Anchorage Municipality, AK MULTIPOLYGON (((-1927463 -1… 2-1
AK Wade Hampton Census Area Pacific Division 1 1 21420000 \$21,420,000 02270 02 270 Alaska 4 West Region 9 10 7398 0.0013517 0.4 0.8 11 8298 0.0013256 0.4 0.6 Wade Hampton Census Area, AK MULTIPOLYGON (((-2310112 -1… 1-2
AK Yukon-Koyukuk Census Area Pacific Division 1 3 7644000 \$7,644,000 02290 02 290 Alaska 4 West Region 9 26 4027 0.0064564 0.6 1.0 28 3979 0.0070369 0.6 1.0 Yukon-Koyukuk Census Area, AK MULTIPOLYGON (((-1736112 -9… 1-1
CA Alameda County Pacific Division 10 134 119049224 \$119,049,224 06001 06 001 California 4 West Region 9 815 563385 0.0014466 1.0 0.8 690 615018 0.0011219 1.0 0.6 Alameda County, CA MULTIPOLYGON (((-2266160 34… 3-3
# Create map with ggplot
svi_divisional_county_nmtc_map <- ggplot() +
  # Map county shapefile, fill with bi_class categories
  geom_sf(data = svi_divisional_county_nmtc_sf, mapping = aes(geometry=geometry, fill = bi_class), color = "white", size = 0.1, show.legend = FALSE) +
  # Set to biscale palette
  bi_scale_fill(pal = "GrPink", dim = 3) +
  # Add state shapefiles for outline
  geom_sf(data=divisional_st_sf, color="black", fill=NA, linewidth=.5, aes(geometry=geometry)) +
  labs(
    title = paste0("Correlation of 2010 ", census_division, " SVI Flag Count \n and 2011 - 2020 NMTC Tax Dollars"),
  ) +
  # Set them to biscale
  bi_theme(base_size = 10)

# Create biscale legend
svi_divisional_county_nmtc_legend <- bi_legend(pal = "GrPink",
                    dim = 3,
                    xlab = "SVI Flag Count",
                    ylab = "NMTC Dollars",
                    size = 8)

# Combine map with legend using cowplot
svi_divisional_county_nmtc_bivarmap <- ggdraw() +
  draw_plot(svi_divisional_county_nmtc_map) +
  # Set legend location
  draw_plot(svi_divisional_county_nmtc_legend, x= -.02,  y = -.05,
 width=.20)


# View map
svi_divisional_county_nmtc_bivarmap

Here we can see that counties with high social vulnerability and large amounts of NMTC dollars are located around metropolitan areas, specifically Los Angeles, San Francisco, and Seattle.

svi_divisional_county_nmtc_sf %>% filter(State %in% c("CA", "WA")) %>% select(State, County, flag_count10, post10_nmtc_dollars_formatted) %>% arrange(desc(flag_count10)) %>% head(6) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County flag_count10 post10_nmtc_dollars_formatted
CA Los Angeles County 9210 \$987,407,086
CA Riverside County 1451 \$40,871,824
CA San Diego County 1305 \$221,738,411
CA Orange County 978 \$12,463,033
CA Fresno County 931 \$115,258,480
CA Sacramento County 831 \$3,630,000

This chart shows that the majority of high SVI flags for the Pacific Division are coming from Central and Southern California, particulary Los Angeles, Riverside, and San Diego Counties.

svi_divisional_county_nmtc_sf %>% filter(State %in% c("AK", "HI", "OR")) %>%
  arrange(desc(post10_nmtc_project_dollars), flag_count10) %>% select(State, County, flag_count10, post10_nmtc_dollars_formatted) %>% head(10) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County flag_count10 post10_nmtc_dollars_formatted
HI Hawaii County 56 \$99,555,000
OR Douglas County 45 \$66,520,000
OR Multnomah County 262 \$52,382,352
HI Honolulu County 343 \$35,830,900
OR Coos County 38 \$35,804,300
OR Umatilla County 26 \$29,975,000
OR Lane County 175 \$28,810,000
HI Maui County 38 \$22,038,000
AK Wade Hampton Census Area 10 \$21,420,000
OR Josephine County 54 \$20,480,000

In contrast, we can see that Alaska, Hawaii, and Oregon all have several counties with low SVI flags that receive high amounts of NMTC dollars.

LIHTC in Pacific Division

NOw we can take a look at LIHTC data:

svi_divisional_county_lihtc_projects <- svi_divisional_county_lihtc %>% filter(post10_lihtc_project_cnt > 0)
svi_divisional_county_lihtc_projects <- svi_divisional_county_lihtc_projects %>% filter(post10_lihtc_project_dollars > 0)
summary(svi_divisional_county_lihtc_projects$flag_count10)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.75   25.50  171.14  149.75 2394.00

Again we can see that there may be a few influential outliers in our data with the highest amount of flags being 2394 and the majority of flags being under 150.

summary(svi_divisional_county_lihtc_projects$post10_lihtc_project_dollars)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   250101   745546  2168330  5287061  4488865 50547731

This is also true of LIHTC dollars, with around a 45,000,000 difference between the 75th percentile and the highest awarded county.

We can view this visually with the graph below:

# Scatterplot
# y is our independent variable (LIHTC Project Dollars),  
# x is our dependent variable (SVI flag count)
ggplot2::ggplot(svi_divisional_county_lihtc_projects,
                aes(x=flag_count10,
                    y=post10_lihtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

# Pearson's r calculation
cor(svi_divisional_county_lihtc_projects$flag_count10, svi_divisional_county_lihtc_projects$post10_lihtc_project_dollars, method = "pearson")
## [1] 0.9418682

Considering all counties in the Pacific Division, there is a very strong positive correlation between the number of SVI flags in 2010 and the amount of LIHTC dollars in 2011-2020, suggesting that counties that are more vulnerable receive more tax credits with the LIHTC program.

However, the plot shows an influential data point that may be skewing the correlation. We can view it as a box plot:

boxplot(svi_divisional_county_lihtc_projects$flag_count10)

boxplot.stats(svi_divisional_county_lihtc_projects$flag_count10)$out %>% sort(decreasing = TRUE)
## [1] 2394  376
svi_divisional_county_lihtc_projects %>% filter(flag_count10 == 2394) %>% select(county_name, flag_count10, post10_lihtc_dollars_formatted) %>% head() 
##              county_name flag_count10 post10_lihtc_dollars_formatted
## 1 Los Angeles County, CA         2394                    $50,547,731

We can see this is the same influential point from our NMTC analysis, Los Angeles County, CA.

K-Means Clustering

svi_divisional_lihtc_cluster <- svi_divisional_county_lihtc_projects %>% 
                            select(county_name, post10_lihtc_project_dollars, 
                                   flag_count10) %>% 
                            remove_rownames %>% 
                            column_to_rownames(var="county_name")

# Remove nulls, if in dataset
svi_divisional_lihtc_cluster <- na.omit(svi_divisional_lihtc_cluster)


# Scale numeric variables
svi_divisional_lihtc_cluster <- scale(svi_divisional_lihtc_cluster)


svi_divisional_lihtc_cluster %>% head(5)
##                     post10_lihtc_project_dollars flag_count10
## Alameda County, CA                    -0.3720126   0.03317421
## Butte County, CA                      -0.4766870  -0.34418241
## Fresno County, CA                     -0.2922395  -0.17225070
## Humboldt County, CA                   -0.4092212  -0.35534681
## Kern County, CA                       -0.2010567  -0.11866159
set.seed(123)
k2_lihtc_div <- kmeans(svi_divisional_lihtc_cluster, centers = 2, nstart = 25)
set.seed(123)
k3_lihtc_div <- kmeans(svi_divisional_lihtc_cluster, centers = 3, nstart = 25)
set.seed(123)
k4_lihtc_div <- kmeans(svi_divisional_lihtc_cluster, centers = 4, nstart = 25)
set.seed(123)
k5_lihtc_div <- kmeans(svi_divisional_lihtc_cluster, centers = 5, nstart = 25)
# plots to compare
p_k2_lihtc_div <- factoextra::fviz_cluster(k2_lihtc_div, geom = "point", data = svi_divisional_lihtc_cluster) + ggtitle("k = 2")

p_k3_lihtc_div <- factoextra::fviz_cluster(k3_lihtc_div, geom = "point", data = svi_divisional_lihtc_cluster) + ggtitle("k = 3")

p_k4_lihtc_div <- factoextra::fviz_cluster(k4_lihtc_div, geom = "point",  data = svi_divisional_lihtc_cluster) + ggtitle("k = 4")

p_k5_lihtc_div <- factoextra::fviz_cluster(k5_lihtc_div, geom = "point",  data = svi_divisional_lihtc_cluster) + ggtitle("k = 5")

grid.arrange(p_k2_lihtc_div, p_k3_lihtc_div, p_k4_lihtc_div, p_k5_lihtc_div, nrow = 2)

elbow_plot(svi_divisional_lihtc_cluster)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15

Again, we will choose to use two clusters as we did for the NMTC.

p_k2_lihtc_div <- factoextra::fviz_cluster(k2_lihtc_div, geom = "point", data = svi_divisional_lihtc_cluster) + ggtitle("k = 2")

p_k2_lihtc_div

svi_divisional_lihtc_cluster_label <- as.data.frame(svi_divisional_lihtc_cluster) %>%
                                  rownames_to_column(var = "county_name") %>%
                                  as_tibble() %>%
                                  mutate(cluster = k2_lihtc_div$cluster) %>%
                                  select(county_name, cluster)

svi_divisional_county_lihtc_projects2 <- left_join(svi_divisional_county_lihtc_projects, svi_divisional_lihtc_cluster_label, join_by(county_name == county_name))

# View county counts in each cluster
table(svi_divisional_county_lihtc_projects2$cluster)
## 
##  1  2 
##  1 27

Now let’s take a closer look at our clusters.

# Cluster 1 Scatterplot
# y is our independent variable (LIHTC Project Dollars),  
# x is our dependent variable (SVI flag count)

svi_divisional_county_lihtc_projects2 %>% 
  filter(cluster == 1) %>%
  ggplot2::ggplot(aes(x=flag_count10,
                    y=post10_lihtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

svi_divisional_county_lihtc_projects2 %>% 
  filter(cluster == 1) %>%
  select(flag_count10,post10_lihtc_project_dollars) %>%
  cor(method = "pearson")
##                              flag_count10 post10_lihtc_project_dollars
## flag_count10                           NA                           NA
## post10_lihtc_project_dollars           NA                           NA

Similar to the NMTC data, we cannot calculate a correlation with only one point.

# Cluster 2 Scatterplot
# y is our independent variable (LIHTC Project Dollars),  
# x is our dependent variable (SVI flag count)

svi_divisional_county_lihtc_projects2 %>% 
  filter(cluster == 2) %>%
  ggplot2::ggplot(aes(x=flag_count10,
                    y=post10_lihtc_project_dollars)) +
        geom_point() +
        geom_smooth(method="lm") +
        scale_y_continuous(labels = scales::dollar_format()) 
## `geom_smooth()` using formula = 'y ~ x'

svi_divisional_county_lihtc_projects2 %>% 
  filter(cluster == 2) %>%
  select(flag_count10,post10_lihtc_project_dollars) %>%
  cor(method = "pearson")
##                              flag_count10 post10_lihtc_project_dollars
## flag_count10                    1.0000000                    0.7026956
## post10_lihtc_project_dollars    0.7026956                    1.0000000

Now we can see that there is still a strong positive correlation between the number of SVI flags in 2010 and LIHTC dollars received in 2011-2020, though not as strong a correlation when looking at all counties in the Pacific Division.

Bivariate Maps

# Join our LIHTC projects data with our shapefile geocoordinates
svi_divisional_county_lihtc_sf <- left_join(svi_divisional_county_lihtc_projects, divisional_county_sf, join_by("FIPS_st" == "STATEFP", "FIPS_county" == "COUNTYFP"))

svi_divisional_county_lihtc_sf %>% head(5) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County Division post10_lihtc_project_cnt tract_cnt post10_lihtc_project_dollars post10_lihtc_dollars_formatted fips_county_st FIPS_st FIPS_county state_name region_number region division_number flag_count10 pop10 flag_by_pop10 flag_count_quantile10 flag_pop_quantile10 flag_count20 pop20 flag_by_pop20 flag_count_quantile20 flag_pop_quantile20 county_name geometry
CA Alameda County Pacific Division 1 23 1590984 \$1,590,984 06001 06 001 California 4 West Region 9 186 92323 0.0020147 1.0 0.6 170 101788 0.0016701 1.0 0.6 Alameda County, CA MULTIPOLYGON (((-2266160 34…
CA Butte County Pacific Division 1 3 551007 \$551,007 06007 06 007 California 4 West Region 9 17 12025 0.0014137 0.6 0.2 19 13034 0.0014577 0.6 0.4 Butte County, CA MULTIPOLYGON (((-2174433 56…
CA Fresno County Pacific Division 4 8 2383558 \$2,383,558 06019 06 019 California 4 West Region 9 94 36454 0.0025786 1.0 0.8 92 34872 0.0026382 1.0 1.0 Fresno County, CA MULTIPOLYGON (((-2051818 14…
CA Humboldt County Pacific Division 1 2 1221303 \$1,221,303 06023 06 023 California 4 West Region 9 12 8644 0.0013882 0.4 0.2 15 8698 0.0017245 0.6 0.6 Humboldt County, CA MULTIPOLYGON (((-2312278 74…
CA Kern County Pacific Division 8 11 3289492 \$3,289,492 06029 06 029 California 4 West Region 9 118 54631 0.0021599 1.0 0.6 116 54102 0.0021441 1.0 0.8 Kern County, CA MULTIPOLYGON (((-2050419 59…
# Create classes for bivariate mapping 
svi_divisional_county_lihtc_sf <- bi_class(svi_divisional_county_lihtc_sf, x = flag_count10, y = post10_lihtc_project_dollars, style = "quantile", dim = 3)

# View data
svi_divisional_county_lihtc_sf %>% head(5) %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
State County Division post10_lihtc_project_cnt tract_cnt post10_lihtc_project_dollars post10_lihtc_dollars_formatted fips_county_st FIPS_st FIPS_county state_name region_number region division_number flag_count10 pop10 flag_by_pop10 flag_count_quantile10 flag_pop_quantile10 flag_count20 pop20 flag_by_pop20 flag_count_quantile20 flag_pop_quantile20 county_name geometry bi_class
CA Alameda County Pacific Division 1 23 1590984 \$1,590,984 06001 06 001 California 4 West Region 9 186 92323 0.0020147 1.0 0.6 170 101788 0.0016701 1.0 0.6 Alameda County, CA MULTIPOLYGON (((-2266160 34… 3-2
CA Butte County Pacific Division 1 3 551007 \$551,007 06007 06 007 California 4 West Region 9 17 12025 0.0014137 0.6 0.2 19 13034 0.0014577 0.6 0.4 Butte County, CA MULTIPOLYGON (((-2174433 56… 2-1
CA Fresno County Pacific Division 4 8 2383558 \$2,383,558 06019 06 019 California 4 West Region 9 94 36454 0.0025786 1.0 0.8 92 34872 0.0026382 1.0 1.0 Fresno County, CA MULTIPOLYGON (((-2051818 14… 2-2
CA Humboldt County Pacific Division 1 2 1221303 \$1,221,303 06023 06 023 California 4 West Region 9 12 8644 0.0013882 0.4 0.2 15 8698 0.0017245 0.6 0.6 Humboldt County, CA MULTIPOLYGON (((-2312278 74… 1-1
CA Kern County Pacific Division 8 11 3289492 \$3,289,492 06029 06 029 California 4 West Region 9 118 54631 0.0021599 1.0 0.6 116 54102 0.0021441 1.0 0.8 Kern County, CA MULTIPOLYGON (((-2050419 59… 2-2
# Create map with ggplot
svi_divisional_county_lihtc_map <- ggplot() +
  # Map county shapefile, fill with bi_class categories
  geom_sf(data = svi_divisional_county_lihtc_sf, mapping = aes(geometry=geometry, fill = bi_class), color = "white", size = 0.1, show.legend = FALSE) +
  # Set to biscale palette
  bi_scale_fill(pal = "GrPink", dim = 3) +
  # Add state shapefiles for outline
  geom_sf(data=divisional_st_sf, color="black", fill=NA, linewidth=.5, aes(geometry=geometry)) +
  labs(
    title = paste0("Correlation of 2010 ", census_division, " SVI Flag Count \n and 2011 - 2020 LIHTC Tax Dollars")
  ) +
  # Set them to biscale
  bi_theme(base_size = 10)

# Create biscale legend
svi_divisional_county_lihtc_legend <- bi_legend(pal = "GrPink",
                    dim = 3,
                    xlab = "SVI Flag Count",
                    ylab = "LIHTC Dollars",
                    size = 8)

# Combine map with legend using cowplot
svi_divisional_county_lihtc_bivarmap <- ggdraw() +
  draw_plot(svi_divisional_county_lihtc_map) +
  # Set legend location
  draw_plot(svi_divisional_county_lihtc_legend, x= -.02,  y = -.05,
 width=.20)


# View map
svi_divisional_county_lihtc_bivarmap

With our map, we can a higher amount of SVI flags and LIHTC dollars in different parts of California and Washington.

svi_divisional_county_lihtc_sf %>% filter(State == "CA") %>% select(State, County, flag_count10, post10_lihtc_dollars_formatted) %>% arrange(desc(flag_count10)) %>% head(6)
##   State                County flag_count10 post10_lihtc_dollars_formatted
## 1    CA    Los Angeles County         2394                    $50,547,731
## 2    CA      San Diego County          376                    $20,961,962
## 3    CA         Orange County          310                    $10,245,926
## 4    CA      Riverside County          256                     $6,335,483
## 5    CA San Bernardino County          249                     $5,757,810
## 6    CA        Alameda County          186                     $1,590,984
svi_divisional_county_lihtc_sf %>% filter(State == "WA") %>% select(State, County, flag_count10, post10_lihtc_dollars_formatted) %>% arrange(desc(flag_count10)) %>% head(6)
##   State              County flag_count10 post10_lihtc_dollars_formatted
## 1    WA      Spokane County           24                       $591,050
## 2    WA     Franklin County           20                     $1,281,731
## 3    WA         King County           14                     $3,259,389
## 4    WA       Kitsap County            9                     $3,905,012
## 5    WA Grays Harbor County            8                     $3,332,521
## 6    WA        Lewis County            3                       $250,101
saveRDS(svi_divisional_lihtc, file = here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_svi_divisional_lihtc.rds")))

saveRDS(svi_national_lihtc, file = here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_svi_national_lihtc.rds")))

saveRDS(svi_divisional_nmtc, file = here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_svi_divisional_nmtc.rds")))

saveRDS(svi_national_nmtc, file = here::here(paste0("data/wrangling/", str_replace_all(census_division, " ", "_"), "_svi_national_nmtc.rds")))