May the Forest be With Us: Distribution of NYC Trees

Tree Data Cleaning

trees_2015 =
  read_csv("large_tree_data/2015_tree_raw.csv", na = c("", "NA", "Unknown")) |> 
  janitor::clean_names() |> 
  mutate(spc_common = str_to_title(spc_common)) |> 
  mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")))

Tree Count

Trees in Each Borough

This plot summarizes the total number of trees in each borough according to the data collected within the 2015 Street Tree Census. Queens has the largest number of trees, which is nearly 4 times the amount in Manhattan, which has the least number of trees.

pop_race_2010 = readxl::read_excel("small_data/pop_race2010_nta.xlsx", skip = 6, col_names = FALSE, na = "NA") |> 
  rename(
   "borough" = "...1" ,
   "census_FIPS" = "...2",
   "nta" = "...3",
   "nta_name" = "...4",
   "total_population" = "...5",
   "white_nonhis" = "...6",
   "black_nonhis" = "...7",
   "am_ind_alaska_nonhis" = "...8",
   "asian_nonhis" = "...9",
   "hawaii_pac_isl_nonhis" = "...10",
   "other_nonhis" = "...11",
   "two_races" = "...12",
   "hispanic_any_race" = "...13"
  ) |> 
  drop_na() |> 
  select(- borough, -nta_name)

all_data = left_join(trees_2015, pop_race_2010, by = "nta")

number_tree = 
  all_data |> 
  group_by(borough) |> 
  summarize(n_trees = n_distinct(tree_id)) |> 
  mutate(borough = fct_reorder(borough, n_trees))

ggplot(data = number_tree, aes(x = borough, y = n_trees, fill = borough)) + geom_bar(stat = 'identity') + geom_text(aes(label = n_trees), size = 3, vjust = -1) + 
  labs(
    title = "Number of Trees in each Borough",
    x = "Borough",
    y = "Total Number of Trees"
  )

Manhattan

Now looking at the Manhattan borough, the plot below shows the breakdown of number of trees per NTA and the total population per 10000 people. By looking at these values side-by-side, it is notable that the Upper West Side is the only NTA that has a human population (per 10000) that surpassed the number of trees. It also has the largest number of trees. The data used to generate this plot is from merging the 2015 Street Tree Census and the 2010 US Census Population data sets.

mhtn_number_tree = 
  all_data |> 
  filter(borough == "Manhattan") |> 
  group_by(nta_name) |> 
  summarize(n_trees = n_distinct(tree_id),
            n_population = (sum(total_population)/100000)) |> 
  pivot_longer(
    n_trees:n_population,
    names_to = "type",
    names_prefix = "n_",
    values_to = "total"
  ) |> 
   mutate(nta_name = fct_reorder(nta_name, total))
  

ggplot(mhtn_number_tree, aes(x = nta_name, y = total, fill = type)) + geom_bar(stat = 'identity', position = 'dodge')+ theme(axis.text.x = element_text(angle = 65, hjust=1)) + 
  labs(
    title = "Number of Trees in Manhattan by NTA",
    x = "Neighborhood Tabulation Area",
    y = "Total Number of Trees"
  )

Neighborhoods

acres_raw <- 
  read_excel("small_data/t_pl_p5_nta.xlsx", 
             range = "A9:J203", 
             col_names = c("borough", "county_code", "nta", "nta_name",
                           "total_pop_2000", "total_pop_2010", "pop_change_num",
                           "pop_change_per", "total_acres", "persons_per_acre")
             ) |> 
  janitor::clean_names()

acres_sub <- acres_raw |> 
  select(nta_name, total_acres)

trees_per_nta <- trees_2015 |> 
  select(nta_name, nta, borough, status) |> 
  filter(status == "Alive") |> 
  count(nta_name, borough) |> 
  rename(num_trees = n)

trees_and_acres <- left_join(trees_per_nta, acres_sub, by = "nta_name")

num_missing_nta <- sum(is.na(trees_and_acres$total_acres))

trees_per_acre_df <- trees_and_acres |> 
  filter(!is.na(total_acres)) |> 
  mutate(trees_per_acre = num_trees/total_acres) |> 
  arrange(desc(trees_per_acre))

trees_per_acre_nta_table <- trees_per_acre_df |> 
  filter(min_rank(desc(trees_per_acre)) < 11) |> 
  knitr::kable(digits = 2, caption = "Top 10 NYC Neighborhoods with the Highest Number of Alive Trees per Acre")

trees_per_acre_nta_table

Top 10 NYC Neighborhoods with the Highest Number of Alive Trees per Acre
nta_name	borough	num_trees	total_acres	trees_per_acre
Upper East Side-Carnegie Hill	Manhattan	4540	460.66	9.86
Central Harlem South	Manhattan	2581	331.39	7.79
Brooklyn Heights-Cobble Hill	Brooklyn	1718	235.86	7.28
Upper West Side	Manhattan	5723	791.29	7.23
Fordham South	Bronx	1002	144.63	6.93
Windsor Terrace	Brooklyn	2227	322.38	6.91
Yorkville	Manhattan	2133	319.14	6.68
Gramercy	Manhattan	1125	171.71	6.55
Auburndale	Queens	5119	785.35	6.52
East New York (Pennsylvania Ave)	Brooklyn	2892	446.05	6.48

Comments:

Upper East Side-Carnegie Hill in Manhattan has the greatest number of alive trees per acre.
6 NTAs from the 2015 Tree Census were missing acreage information, so these NTAs were removed for the purposes of this table.

Highest Number of Trees

trees_total_nta_table <- trees_per_acre_df |> 
  filter(min_rank(desc(num_trees)) < 11) |> 
  knitr::kable(digits = 2, caption = "Top 10 NYC Neighborhoods with the Highest Total Number of Alive Trees")

trees_total_nta_table

Top 10 NYC Neighborhoods with the Highest Total Number of Alive Trees
nta_name	borough	num_trees	total_acres	trees_per_acre
Rossville-Woodrow	Staten Island	8843	1488.90	5.94
Forest Hills	Queens	7330	1328.22	5.52
Bayside-Bayside Hills	Queens	9386	1857.24	5.05
Great Kills	Staten Island	10267	2076.96	4.94
Whitestone	Queens	7253	1584.85	4.58
Georgetown-Marine Park-Bergen Beach-Mill Basin	Brooklyn	7214	1662.88	4.34
Annadale-Huguenot-Prince’s Bay-Eltingville	Staten Island	12530	3292.86	3.81
East New York	Brooklyn	9175	2665.73	3.44
Charleston-Richmond Valley-Tottenville	Staten Island	7913	3432.93	2.31
New Springville-Bloomfield-Travis	Staten Island	8142	7083.27	1.15

Comments:

Rossville-Woodrow in Staten Island has the greatest total number of alive trees.
It is important to note while these neighborhoods have a high total number of alive trees, the number of alive trees per acre is low.
6 NTAs from the 2015 Tree Census were missing acreage information, so these NTAs were removed for the purposes of this table.

Total Trees per NTA

The map below visualizes the spatial distribution of total street trees per neighborhood tabulation area (NTA). The neighborhoods that appear to have the lowest number of street trees are lower Manhattan and the North Bronx. The neighborhoods that appear to be in the highest band of tree counts, ranging from over 5000 to over 12000, are in southern Staten Island, Southeast Brooklyn, and across Queens.

nyc_nta = st_read("small_data/geo_export_e924b274-8b6e-427d-87c0-92bfde8ce30a.shp",
                  quiet = TRUE)

nta_dbh_summarized = trees_2015 |> 
  drop_na(tree_dbh) |> 
  group_by(nta) |> 
  summarize(
    trees_per_nta = n(),
    avg_dbh = mean(tree_dbh))


nta_dbh_spatial = merge(nyc_nta, nta_dbh_summarized, 
                        by.x = "ntacode",
                        by.y = "nta")

tm_shape(nta_dbh_spatial) +
  tm_polygons(col = "trees_per_nta",
              style = "quantile",
              n = 5,
              palette = "Greens",
              border.col = "black",
              title = "Number of Trees per NTA") +
  tm_layout(main.title = "Total Tree Count Across NYC Neighborhoods", main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.position = c("left","center"),
            legend.text.size = 0.52)

It is also important to consider that NTAs do not have standardized area, and some NTAs are dramatically larger than others and not all area is equally accommodating to planting street trees.To better understand tree density, we calculated a unit of trees per acre so that this value could be averaged across neighborhoods and account for area differences across NYC boroughs.

Tree Species

Tree Species Distribution

top_trees_nyc <- trees_2015 %>% 
  drop_na(spc_common) %>%   
  group_by(spc_common) %>%
  count(spc_common) %>% 
  arrange(desc(n)) %>% 
  head(n = 10) %>% 
  pull(spc_common)

 trees_2015 %>% 
    drop_na(spc_common, borough) %>% 
    group_by(spc_common, borough) %>%
    count(spc_common) %>% 
    filter(spc_common %in% top_trees_nyc) %>% 
    pivot_wider(names_from = borough, values_from = n) %>% 
    mutate(`All Boroughs Average` = mean(Bronx:`Staten Island`)) %>% 
    select(spc_common, `All Boroughs Average`, everything()) %>% 
    pivot_longer(cols = `All Boroughs Average`:`Staten Island`) %>% 
    ggplot(aes(x = reorder(spc_common, value), y = value, fill = spc_common)) +
    geom_bar(stat = "identity") +
    facet_wrap(~ name) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "Tree Species", 
         y = "Number of Trees", 
         fill = "Tree Species",
         title = "Top Ten Trees Overall and in Each NYC Borough")

Comments:

The most common tree in NYC overall is the Callery Pear.
The Callery Pear is also the most common tree in Manhattan and Staten Island.
The most common tree species in both Brooklyn and Queens is the London Planetree, and the Honeylocust is the most common tree in the Bronx.

Top Tree Species in Each Borough

trees_2015 %>%
  drop_na() %>%
  group_by(borough, spc_common) %>%
  count() %>%
  arrange(borough, desc(n)) %>%
  group_by(borough) %>%
  slice_max(n = 10, order_by = n) %>% 
  select(-n) %>% 
  group_by(borough) %>%
  mutate(row_num = row_number()) %>%
  pivot_wider(names_from = borough, values_from = spc_common) %>% 
  unnest(cols = c(Bronx, Brooklyn, Manhattan, Queens, `Staten Island`)) %>% 
  select(row_num, everything()) %>% 
  knitr::kable(caption = "Top 10 Tree Species in Each Borough")

Top 10 Tree Species in Each Borough
row_num	Bronx	Brooklyn	Manhattan	Queens	Staten Island
1	Honeylocust	London Planetree	Honeylocust	London Planetree	Callery Pear
2	London Planetree	Honeylocust	Callery Pear	Pin Oak	London Planetree
3	Pin Oak	Pin Oak	Ginkgo	Honeylocust	Red Maple
4	Callery Pear	Japanese Zelkova	Pin Oak	Norway Maple	Pin Oak
5	Japanese Zelkova	Callery Pear	Sophora	Callery Pear	Cherry
6	Cherry	Littleleaf Linden	London Planetree	Cherry	Sweetgum
7	Littleleaf Linden	Norway Maple	Japanese Zelkova	Littleleaf Linden	Honeylocust
8	Norway Maple	Sophora	Littleleaf Linden	Japanese Zelkova	Norway Maple
9	Ginkgo	Cherry	American Elm	Green Ash	Silver Maple
10	Sophora	Ginkgo	American Linden	Silver Maple	Maple

Top Species in Queens

After discovering that Queens has the greatest number of trees, a closer look was taken to evaluate which species are most prevalent. Due to the vast number of species populating Queens, the below visualization represents the counts of each species with counts up to 500 and thus showing which species are most rare.

queens_rare_trees = 
  all_data |> 
  filter(borough == "Queens") |>
  count(spc_common) |> 
  filter(n < 500) |> 
  mutate(spc_common = fct_reorder(spc_common, n))

ggplot(data = queens_rare_trees, aes(x = spc_common, y = n, fill = spc_common)) + geom_bar(stat = 'identity') + theme(legend.position = "none", axis.text.x = element_text(angle = 65, hjust=1)) + 
  labs(
    title = "Species of Trees in Queens (n < 500)",
    x = "Species (common name)",
    y = "Total Number of Trees"
  )

This plot is a continuation of the trends analyzed in the previous visualization. Instead of the counts of each species being less than 500, this plot shows the counts of each species with a count over 500.

queens_prev_trees = 
  all_data |> 
  drop_na() |> 
  filter(borough == "Queens") |>
  count(spc_common) |> 
  filter(n > 500) |> 
  mutate(spc_common = fct_reorder(spc_common, n))

ggplot(data = queens_prev_trees, aes(x = spc_common, y = n, fill = spc_common)) + geom_bar(stat = 'identity') + theme(legend.position = "none", axis.text.x = element_text(angle = 65, hjust=1)) + 
  labs(
    title = "Species of Trees in Queens (n > 500)",
    x = "Species (common name)",
    y = "Total Number of Trees"
  )

Tree DBH

Tree DBH Defined

Understanding DBH broadly:

DBH, or diameter at breast height, is a standardized measurement of tree growth that is used by foresters as a baseline unit for understanding tree age, size, and total overall biomass. Depending on the ecological characteristics of a tree species, DBH measurements can be used to calculate estimates of above ground biomass (wood and branch cover) and below ground biomass, as well as carbon storage. With the limited information this 2015 street survey provides about DBH and tree species, we will primarily be using DBH as a proxy for the size, age, and maturity of trees across NYC neighborhoods.
Roughly, a lower average DBH represents a greater distribution of smaller and younger trees than a higher average DBH, which has a greater number of very large, mature trees. The estimation of tree age by DBH varies widely by species, due to the differential growth factors of each tree species. Available literature and online tree calculators show that across species, a DBH of around 6 inches can range from 10 - 25 years of tree growth, while a DBH of around 14 - 15 inches ranges from 35 - 60 years of tree growth. It is likely that a tree over 40 - 50 inches has seen close to or over a century of growth.

Links to learn more:

Neighborhoods with Highest Average DBH

trees_dbh = trees_2015 |> 
  drop_na(tree_dbh) |> 
  group_by(borough, nta_name) |> 
  summarize(
    n_trees = n(),
    avg_dbh = mean(tree_dbh)) |> 
  arrange(desc(avg_dbh))

trees_dbh |> 
  ungroup() |> 
  filter(n_trees > 100) |> 
  filter(min_rank(desc(avg_dbh)) < 11) |> 
  knitr::kable(digits = 2, caption= "Top 10 Neighborhoods with the Highest Average DBH (inches) Across Local Street Trees")

Top 10 Neighborhoods with the Highest Average DBH (inches) Across Local Street Trees
borough	nta_name	n_trees	avg_dbh
Queens	Kew Gardens	2024	15.56
Queens	Woodhaven	4254	15.41
Brooklyn	Flatlands	5589	15.40
Staten Island	New Dorp-Midland Beach	5452	15.24
Brooklyn	East Flatbush-Farragut	3008	15.01
Brooklyn	Georgetown-Marine Park-Bergen Beach-Mill Basin	7442	15.01
Queens	South Ozone Park	7321	14.97
Queens	Jamaica Estates-Holliswood	4254	14.73
Queens	Oakland Gardens	6059	14.73
Queens	Auburndale	5332	14.59

Comments:

Small NTA regions that had high average DBH but low tree counts of less than 100 were filtered out of this table.
Averages around 15 inches for all 10 neighborhoods, with multiple thousands of trees within their sample size.
6/10 highest in Queens, no representation of Manhattan or Bronx.

Distribution of DBH in Each Borough

borough_dbh = trees_2015 |> 
  drop_na(tree_dbh) |> 
  group_by(borough) |> 
  mutate(borough_dbh = mean(tree_dbh))

borough_dbh |> 
  filter(tree_dbh < 101) |> 
  plot_ly(y = ~tree_dbh, color = ~borough, type = "box", colors = "viridis") |> 
  layout(title = "Distribution of DBH in Each Borough",
         xaxis = list(title = 'Borough'),
         yaxis = list(title = 'Measured DBH (inches)'),
        showlegend = FALSE)

Comments:

All boroughs had outlier trees with DBH nearing 90-100 inches. Only 70 trees in the over 600,000 surveyed sample had a DBH over 100 inches and were excluded for better visual clarity.
Bronx has the lowest median tree DBH, but Manhattan has the narrowest interquartile arrange around low DBH’s of 4 - 11 inches.
Brooklyn and Queens have the highest interquartile range of DBH - with Q1 being 18 inches for both - highest amount of older growth trees.

borough_dbh |> 
  filter(tree_dbh < 50) |> 
  ggplot(aes(x = tree_dbh, fill = borough)) + 
  geom_density() + facet_grid(borough ~ .) +
  scale_x_continuous(breaks = scales::pretty_breaks(12)) +
  labs(
    title = "Density Plot of All Measured Tree Diameters Below 50 inches",
    x = "Measured DBH (Inches)",
    y = "Density"
  )

Comments:

Young Adult trees with DBH between 3 - 7 inches most common across all boroughs.
All tree DBH distributions are right skewed across Boroughs.
Queens and Brooklyn have wider tails accounting for trees with DBH over 20 inches.
Manhattan and Bronx have peaks where about 10% of all trees are less than 5 inches in DBH, these peaks are lower around 7 - 8% for Queens and Brooklyn respectively.
Staten Island closest to bimodal distribution.

Tree DBH Mapped

tm_shape(nta_dbh_spatial) +
  tm_polygons(col = "avg_dbh",
              style = "quantile",
              n = 5,
              palette = "YlGn",
              border.col = "black",
              title = "Mean Tree DBH (Inches)") +
  tm_layout(main.title = "Average Tree DBH Across NYC Neighborhoods",
            main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.position = c("left","center"),
            legend.text.size = 0.52)

Comments:

Highest DBH clusters between 13 and 15 inches are in easternmost Queens and South Brooklyn, some of the most residential and spacious neighborhoods of the city. Suggests these neighborhoods have the largest percentage of significantly older trees that have been growing for decades.
Lowest DBH neighborhoods across downtown Manhattan and the Bronx - youngest and smallest trees. May have more routine plantings of young trees - and also limited curb space that stunts growth.

Tree Health Status

Below are some visualizations of all 5 boroughs of New York by Zip Code, colored by tree health. This data was collected from the 2015 Street Tree Census. The decision to use Zip Codes for these maps was made due to the availability of a shape file of New York with lines to separate by Zip Code.

NYC Overall

zip_nyc = st_read("small_data/MODZCTA_2010.shp",
                  quiet = TRUE)

trees_df = 
  trees_2015 |> 
  drop_na() |> 
  group_by(postcode, borough, health) |> 
  summarize(
    n_trees = n())

trees_zip =
  merge(zip_nyc, trees_df, 
                        by.x = "MODZCTA",
                        by.y = "postcode") |> 
  janitor::clean_names()

trees_zip |> 
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes in NYC",
            main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.position = c("left","center"),
            legend.text.size = 0.52)

Health Status By Borough

The following 5 visualizations are closer visualizations of tree health status in each borough.

Manhattan

trees_zip |> 
  filter(borough == "Manhattan") |> 
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes
            in Manhattan",
            main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

Brooklyn

trees_zip |> 
  drop_na() |> 
  filter(borough == "Brooklyn") |> 
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes in Brooklyn",
             main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

Queens

trees_zip |> 
  drop_na() |> 
  filter(borough == "Queens") |>
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes in Queens",
             main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

The Bronx

trees_zip |> 
  drop_na() |> 
  filter(borough == "Bronx") |>
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes in Bronx",
             main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

Staten Island

trees_zip |> 
  drop_na() |> 
  filter(borough == "Staten Island") |>
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes in Staten Island",
             main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

Washington Heights

Tree Health Map

The map below is the tree health across Washington Heights by Zip Codes.

trees_df_nta = 
  trees_2015 |> 
  drop_na() |> 
  group_by(nta_name, postcode, health) |> 
  summarize(
    n_trees = n())

trees_nta = 
  merge(zip_nyc, trees_df_nta, 
                        by.x = "MODZCTA",
                        by.y = "postcode") |> 
  janitor::clean_names()

trees_nta |> 
  drop_na() |> 
  filter(
         nta_name %in% c("Washington Heights South", "Washington Heights North")) |>
  tm_shape() +   
  tm_polygons(col = "health",
              style = "equal",
              n = 3,
              palette = "viridis",
              border.col = "black",
              title = "Tree Health Status")+
  tm_layout(main.title = "Tree Heath by Zip Codes 
            in Washington Heights",
             main.title.size = 1,
            frame = FALSE) +
  tm_legend(legend.text.size = 0.52)

Interactive Tree Scatter Plot

This plot is an interactive scatter plot of every tree included in Washington Heights included in the 2015 Street Tree Census data set. The colors indicate the health of the tree and when hovering over a certain point, the latitude, longitude, and species are shown.

trees_2015 |>
  drop_na() |> 
  filter(
         nta_name %in% c("Washington Heights South", "Washington Heights North")) |>
   plot_ly(color = ~health, colors = "viridis") |> 
  add_trace(
    type = "scattermapbox",
    mode = "markers",
    lon = ~longitude,
    lat = ~latitude,
    marker = list(size = 5),
    text = ~spc_common,
    below = "markers"
  ) |> 
  layout(
    title = "Tree Health in Washington Heights",
    mapbox = list(
      style = "carto-positron",  
      center = list(lon = -73.94, lat = 40.84), 
      zoom = 12
    ),
    showlegend = T
  )