knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(rlang)
library(viridis)
theme_set(theme_minimal() + theme(legend.position = "right"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
# Load and wrangle data
trees_2015 <- read_csv("large_tree_data/2015_tree_raw.csv", na = c("", "NA", "Unknown")) |>
janitor::clean_names() %>%
mutate(spc_common = str_to_title(spc_common)) %>%
mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")))
tree_status_df <- trees_2015 %>%
select(curb_loc, guards, sidewalk, health, status,
borough,
root_stone, root_grate, root_other,
trunk_wire, trnk_light, trnk_other,
brch_light, brch_shoe, brch_other) %>%
mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")),
borough = as.factor(borough),
All = as.factor("All"),
curb_loc = case_match(curb_loc,
"OffsetFromCurb" ~ "Offset From Curb",
"OnCurb" ~ "On Curb"),
curb_loc = as.factor(curb_loc),
guards = as.factor(guards),
sidewalk = case_match(sidewalk,
"NoDamage" ~ "No Damage",
"Damage" ~ "Damage"),
sidewalk = as.factor(sidewalk),
status = as.factor(status)
) %>%
rename(`Curb Location` = curb_loc,
`All of NYC` = All,
Borough = borough,
Guards = guards,
`Sidewalk Condition` = sidewalk,
`Alive, Dead, or Stump` = status,
Health = health,
`Roots with Stones` = root_stone,
`Roots in Grate` = root_grate,
`Root Problem (Other)` = root_other,
`Trunk has Wires` = trunk_wire,
`Trunk has Lights` = trnk_light,
`Trunk Problem (Other)` = trnk_other,
`Branches have Lights` = brch_light,
`Branches with Shoes` = brch_shoe,
`Branches Problem (Other)` = brch_other) %>%
select(Borough, `All of NYC`, everything())
exposure_options <- colnames(tree_status_df)[3:length(tree_status_df)]
outcome_options <- colnames(tree_status_df)[3:length(tree_status_df)]
# Define function for plot
tree_attribute_plot <- function(df, exposure_str, outcome_str){
# Convert character strings to symbols
exposure_sym <- rlang::sym(exposure_str)
outcome_sym <- rlang::sym(outcome_str)
# Make contingency table for the chi-square test
contingency_table <- df %>%
filter(!is.na(!!exposure_sym), !is.na(!!outcome_sym)) %>%
group_by(!!outcome_sym, !!exposure_sym) %>%
count(!!outcome_sym, !!exposure_sym) %>%
pivot_wider(names_from = !!exposure_sym, values_from = n)
# Perform the chi-square test
chisq_test_result <- chisq.test(contingency_table[-1])
chi_square <- chisq_test_result$statistic
p_value <- chisq_test_result$p.value
# Visualize the distribution differences
df %>%
filter(!is.na(!!exposure_sym), !is.na(!!outcome_sym)) %>%
group_by(!!exposure_sym, !!outcome_sym) %>%
summarize(n = n(), .groups = 'drop') %>%
group_by(!!exposure_sym) %>%
mutate(Percent = n / sum(n) * 100) %>%
ggplot(aes(x = !!exposure_sym, y = Percent, fill = !!outcome_sym)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(aes(label = sprintf("%.1f%%", Percent)),
position = position_dodge(width = 0.9),
size = 3.5,
vjust = -0.3) +
labs(title = str_c(outcome_str, " by ", exposure_str),
subtitle = sprintf("p value = %.1e (Chi-square = %.2f)", p_value, chi_square)) +
theme(legend.position="bottom") +
theme(text = element_text(size = 14))
}
The section below allows for exploration of the relationship between different attributes of trees, such as curb location, tree guards, the quality of the sidewalk, or the tree health, with tree attributes, such as:
Health
Root health (whether they have stones, are in a grate, or have other problems)
Trunk health (whether it has wires, branches, or other abnormalities)
Branch details (lights or other abnormalities)
Whether the tree is alive, dead, or a stump.
The figure includes the percentages of the different distribution categories, a visualization of these relative percentages, and the results of a chi-square test to determine whether the distributions were significantly different between the conditions explored.
exposure <- "Curb Location"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Health")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Alive, Dead, or Stump")
exposure <- "Sidewalk Condition"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Health")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Alive, Dead, or Stump")
exposure <- "Guards"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Health")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Alive, Dead, or Stump")
exposure <- "Health"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
The section below shows the overall distribution of different tree surroundings, qualities, attributes, and health. It can be explored by borough or as an overall summary of the entire city.
exposure <- "Borough"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Health")
This figure shows the distribution of different attributes of trees and their status across all of NYC. For example, we see that 81.1% of trees are in “good” health, 14.8% of trees are in “fair” health, and 4.1% of trees are in “poor” health. Notably, 28.7% of sidewalks are damaged (71.2% not damaged). The majority of trees (87.8%) do not have guards and 20.5% of tree roots have stones.
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Sidewalk Condition")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Curb Location")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Guards")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches with Shoes")
tree_status_df %>%
filter(`Branches with Shoes` == "Yes") %>%
group_by(Borough) %>%
count(`Branches with Shoes`) %>%
ggplot(aes(x = Borough, y = n, fill = Borough)) +
geom_bar(stat = 'identity', position = 'dodge') +
labs(title = "Number of Trees with Shoes in Branches",
x = "Borough",
y = "Number of Trees")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Alive, Dead, or Stump")
exposure <- "All of NYC"
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Health")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Sidewalk Condition")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Curb Location")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Guards")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots with Stones")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Roots in Grate")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Root Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Wires")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk has Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Trunk Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches have Lights")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches Problem (Other)")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Branches with Shoes")
tree_attribute_plot(df = tree_status_df, exposure_str = exposure, outcome_str = "Alive, Dead, or Stump")
For example, we see that 81.1% of trees are in “good” health, 14.8% of trees are in “fair” health, and 4.1% of trees are in “poor” health. Notably, 28.7% of sidewalks are damaged (71.2% not damaged). The majority of trees (87.8%) do not have guards and 20.5% of tree roots have stones.
Compared to the other four boroughs, Manhattan had the lowest percentage of trees in “good” health, and the highest percentage of trees in both “fair” and “poor” health. Overall in NYC, 81.1% of trees were in “good” health, 14.8% of trees were in “fair” health, and 4.1% were in “poor” health.