The Concrete Forest: Tree Access Across Sociodemographic Groups

Tree Data Cleaning

trees_2015 =
  read_csv("large_tree_data/2015_tree_raw.csv", na = c("", "NA", "Unknown")) |> 
  janitor::clean_names() |> 
  mutate(spc_common = str_to_title(spc_common)) |> 
  mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")))

Population

This plot shows the population breakdown of each New York borough by race and ethnicity category included in the US 2010 Census. White is the majority in all boroughs except for the Bronx. Hawaiian and Pacific Islander is consistently the lowest in all 5 boroughs.

pop_race_2010 = readxl::read_excel("small_data/pop_race2010_nta.xlsx", skip = 6, col_names = FALSE, na = "NA") |> 
  rename(
   "borough" = "...1" ,
   "census_FIPS" = "...2",
   "nta" = "...3",
   "nta_name" = "...4",
   "total_population" = "...5",
   "white_nonhis" = "...6",
   "black_nonhis" = "...7",
   "am_ind_alaska_nonhis" = "...8",
   "asian_nonhis" = "...9",
   "hawaii_pac_isl_nonhis" = "...10",
   "other_nonhis" = "...11",
   "two_races" = "...12",
   "hispanic_any_race" = "...13"
  ) |> 
  drop_na()

dem_race = 
  pop_race_2010 |> 
  group_by(borough) |> 
  summarize(white = sum(white_nonhis),
            black = sum(black_nonhis),
            asian = sum(asian_nonhis),
            am_ind_alaska = sum(am_ind_alaska_nonhis),
            hawaii_pac_island = sum(hawaii_pac_isl_nonhis),
            other = sum(other_nonhis),
            hispanic_all_races = sum(hispanic_any_race),
            two_races = sum(two_races)
            ) |> 
  pivot_longer(
    white:two_races,
    names_to = "race",
    values_to = "n"
  ) |> 
   mutate(borough = fct_reorder(borough, n)) 

ggplot(data = dem_race, aes(x = borough, y = n, fill = race)) + geom_bar(stat = 'identity', position = 'dodge')  + 
  labs(
    title = "Population of Borough by Race",
    x = "Borough",
    y = "Total Population"
  )

pop_race_2010s =  
  pop_race_2010 |> 
  select(- borough, -nta_name)

all_data = left_join(trees_2015, pop_race_2010s, by = "nta")

borough_data = 
  all_data |> 
  group_by(borough) |> 
  summarize(n_trees = n_distinct(tree_id),
            n_white = (sum(white_nonhis)/sum(total_population)*100),
            n_black = (sum(black_nonhis)/sum(total_population)*100),
            n_asian = (sum(asian_nonhis)/sum(total_population)*100),
            n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
            n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
            n_other = (sum(other_nonhis)/sum(total_population)*100),
            n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
            n_two_races = (sum(two_races)/sum(total_population)*100)
            ) |> 
  mutate(n_trees = as.numeric(n_trees)) |> 
  arrange(desc(n_trees))

borough_data |> 
 knitr::kable(digits = 3, caption = "Trees and Population Demographics by Borough")

Trees and Population Demographics by Borough
borough	n_trees	n_white	n_black	n_asian	n_pacific_islander	n_amer_ind	n_other	n_hispanic	n_two_races
Queens	250551	29.840	17.494	22.617	0.051	0.303	1.601	25.424	2.670
Brooklyn	177293	34.912	33.732	9.614	0.025	0.187	0.439	19.488	1.603
Staten Island	105318	71.685	6.017	6.866	0.030	0.122	0.190	13.858	1.233
Bronx	85203	12.830	30.513	3.339	0.029	0.256	0.643	51.208	1.184
Manhattan	65423	50.743	13.488	9.241	0.032	0.132	0.327	24.153	1.885

Comments:

This table lists the number of trees and the percent racial and ethnic composition within each borough.
Queens has the highest number of trees, the highest percentage of Asian, Hawaiian and Pacific Islander, American Indian and Two Races population.

number_mhtn = 
  all_data |> 
  filter(borough == "Manhattan") |> 
  summarize(n_trees = n_distinct(tree_id))

percentages = 
  all_data |> 
  filter(borough == "Manhattan") |> 
  group_by(nta_name) |> 
  summarize(n_trees = n_distinct(tree_id),
            n_white = (sum(white_nonhis)/sum(total_population)*100),
            n_black = (sum(black_nonhis)/sum(total_population)*100),
            n_asian = (sum(asian_nonhis)/sum(total_population)*100),
            n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
            n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
            n_other = (sum(other_nonhis)/sum(total_population)*100),
            n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
            n_two_races = (sum(two_races)/sum(total_population)*100)
            ) |> 
  mutate(n_trees = as.numeric(n_trees)) |> 
  arrange(desc(n_trees))

percentages |> 
 knitr::kable(digits = 3, caption = "Manhattan Trees and Population Demographics by NTA")

Manhattan Trees and Population Demographics by NTA
nta_name	n_trees	n_white	n_black	n_asian	n_pacific_islander	n_amer_ind	n_other	n_hispanic	n_two_races
Upper West Side	5881	67.709	8.420	5.959	0.026	0.103	0.336	15.460	1.988
Upper East Side-Carnegie Hill	4673	87.648	1.093	4.986	0.020	0.029	0.214	4.630	1.379
West Village	3837	80.891	2.023	8.153	0.030	0.075	0.353	6.062	2.413
Central Harlem North-Polo Grounds	3515	5.770	67.073	1.731	0.027	0.340	0.259	22.831	1.969
Washington Heights South	2990	11.252	10.454	2.727	0.006	0.144	0.360	74.056	1.001
Hudson Yards-Chelsea-Flatiron-Union Square	2988	65.091	5.726	11.785	0.030	0.133	0.372	14.602	2.262
Morningside Heights	2751	46.041	13.623	13.342	0.054	0.188	0.363	23.521	2.870
Washington Heights North	2684	25.776	4.078	2.534	0.015	0.086	0.317	66.149	1.044
Central Harlem South	2670	16.085	55.877	3.541	0.060	0.256	0.408	21.080	2.695
East Harlem North	2591	6.784	35.549	3.044	0.016	0.221	0.319	52.743	1.325
Hamilton Heights	2407	10.897	32.246	2.199	0.031	0.245	0.367	52.193	1.822
Lenox Hill-Roosevelt Island	2327	75.313	4.421	10.609	0.076	0.071	0.324	7.258	1.929
Yorkville	2269	75.996	3.667	9.271	0.032	0.065	0.277	8.810	1.881
Turtle Bay-East Midtown	2253	77.053	2.091	12.990	0.041	0.045	0.359	5.772	1.649
SoHo-TriBeCa-Civic Center-Little Italy	2215	66.094	2.185	22.175	0.026	0.070	0.400	6.481	2.569
Lincoln Square	2065	73.351	4.407	11.248	0.023	0.094	0.285	8.647	1.945
East Harlem South	2014	17.395	24.571	8.293	0.095	0.166	0.376	47.492	1.611
Clinton	2008	56.427	6.253	15.007	0.068	0.153	0.394	19.347	2.352
Lower East Side	1943	22.552	10.871	24.900	0.018	0.195	0.262	39.571	1.632
Murray Hill-Kips Bay	1746	66.647	4.775	16.225	0.032	0.108	0.357	9.870	1.987
East Village	1575	65.452	3.949	14.863	0.050	0.145	0.412	12.378	2.751
Marble Hill-Inwood	1508	15.103	9.068	1.891	0.011	0.137	0.383	72.428	0.980
Chinatown	1474	16.339	4.776	63.872	0.023	0.079	0.157	13.419	1.336
Battery Park City-Lower Manhattan	1311	65.405	3.244	20.192	0.043	0.088	0.385	7.695	2.947
Midtown-Midtown South	1196	64.097	4.576	20.754	0.028	0.119	0.321	8.117	1.987
Gramercy	1172	73.685	3.298	13.363	0.036	0.068	0.275	7.228	2.047
Manhattanville	913	7.451	25.786	2.161	0.022	0.336	0.331	62.802	1.111
Stuyvesant Town-Cooper Village	447	74.113	3.568	12.647	0.029	0.109	0.299	6.675	2.561

Comments:

This table lists the number of trees and percent racial and ethnic composition within each NTA in Manhattan.
The table is ranked by most number of trees and thus, we can conclude that the top 3 neighborhoods with the most trees have a majority white population.

Since NTAs may differ in size, trees per acre was used to standardize street tree counts in each NTA by the acreage of the NTA. 6 NTAs from the 2015 Tree Census were missing acreage information, so these NTAs were removed for these analyses.

Only alive trees were included in the poverty level and education analyses.

Correlation coefficients were calculated to gauge if a linear association exists between our x and y variables. To test the significance of a correlation coefficient, x and y variables must have a bivariate normal distribution. Since most of our x and y variables exhibited a skewed distribution (histograms not shown), we were unable to test the significance of the correlation coefficient at this time.

Poverty

Is there an association between the percentage of people whose income is below the poverty line and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?

All of NYC

When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0220).

acres_raw <- 
  read_excel("small_data/t_pl_p5_nta.xlsx", 
             range = "A9:J203", 
             col_names = c("borough", "county_code", "nta", "nta_name",
                           "total_pop_2000", "total_pop_2010", "pop_change_num",
                           "pop_change_per", "total_acres", "persons_per_acre")
             ) |> 
  janitor::clean_names()

acres_sub <- acres_raw |> 
  select(nta_name, total_acres)

trees_per_nta <- trees_2015 |> 
  select(nta_name, nta, borough, status) |> 
  filter(status == "Alive") |> 
  count(nta_name, borough) |> 
  rename(num_trees = n)

trees_and_acres <- left_join(trees_per_nta, acres_sub, by = "nta_name")

num_missing_nta <- sum(is.na(trees_and_acres$total_acres))

trees_per_acre_df <- trees_and_acres |> 
  filter(!is.na(total_acres)) |> 
  mutate(trees_per_acre = num_trees/total_acres) |> 
  arrange(desc(trees_per_acre))

poverty_raw <-
  read_csv("large_tree_data/NYC EH Data Portal - Neighborhood poverty.csv") |> 
  janitor::clean_names()

poverty_clean <- poverty_raw |> 
  rename(nta_name = geography, poverty_percent = percent) |> 
  filter(geo_type == "NTA2010" & time == "2013-17") |> 
  select(nta_name, poverty_percent)

trees_and_poverty <- left_join(trees_per_acre_df, poverty_clean, by = "nta_name")

cor(pull(trees_and_poverty, trees_per_acre), pull(trees_and_poverty, poverty_percent))

## [1] -0.02202705

trees_and_poverty |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in NYC",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Poverty By Borough

The following tabs explore the association between percentage of people living below the poverty line and number of trees per acre.

Manhattan

When examining only neighborhoods in Manhattan, there is a weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.2111).

manhattan_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Manhattan")

cor(pull(manhattan_trees_and_poverty, trees_per_acre), pull(manhattan_trees_and_poverty, poverty_percent))

## [1] -0.2110746

trees_and_poverty |> 
  filter(borough == "Manhattan") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Manhattan",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Brooklyn

When examining only neighborhoods in Brooklyn, there remains a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0027). This could indicate that a non-linear relationship may be more suitable; it is also possible that no association exists.

brooklyn_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Brooklyn")

cor(pull(brooklyn_trees_and_poverty, trees_per_acre), pull(brooklyn_trees_and_poverty, poverty_percent))

## [1] -0.002666612

trees_and_poverty |> 
  filter(borough == "Brooklyn") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Brooklyn",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

The Bronx

When examining only neighborhoods in the Bronx, there is a moderate, positive, linear association between percentage of people living below the poverty line and number of trees per acre (r = 0.4558). It appears that as the percentage of people living below the poverty line increases, the number of trees per acre increases. These results are contrary to our expectations. This may indicate that other factors may be at play.

bronx_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Bronx")

cor(pull(bronx_trees_and_poverty, trees_per_acre), pull(bronx_trees_and_poverty, poverty_percent))

## [1] 0.4557542

trees_and_poverty |> 
  filter(borough == "Bronx") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Bronx",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Staten Island

When examining only neighborhoods in Staten Island, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4646). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.

staten_island_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Staten Island")

cor(pull(staten_island_trees_and_poverty, trees_per_acre), pull(staten_island_trees_and_poverty, poverty_percent))

## [1] -0.4646225

trees_and_poverty |> 
  filter(borough == "Staten Island") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Staten Island",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Queens

When examining only neighborhoods in Queens, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4384). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.

queens_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Queens")

cor(pull(queens_trees_and_poverty, trees_per_acre), pull(queens_trees_and_poverty, poverty_percent))

## [1] -0.4383884

trees_and_poverty |> 
  filter(borough == "Queens") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Queens",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Summary

Summary: When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre. When examining neighborhoods by borough, the association between percentage of people living below the poverty line and number of trees per acre varies. Notably, neighborhoods across Staten Island and Queens exhibit a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre; as the percentage of people living below the poverty line increases, the number of trees per acre decreases.

Education

Is there an association between the percentage of people who graduated high school and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?

All of NYC

When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940).

education_raw <-
  read_csv("large_tree_data/NYC EH Data Portal - Graduated high school.csv") |> 
  janitor::clean_names()

education_clean <- education_raw |> 
  rename(nta_name = geography, graduated_hs_percent = percent) |> 
  filter(geo_type == "NTA2010" & time == "2013-17") |> 
  select(nta_name, graduated_hs_percent)

trees_and_education <- left_join(trees_per_acre_df, education_clean, by = "nta_name")

cor(pull(trees_and_education, trees_per_acre), pull(trees_and_education, graduated_hs_percent))

## [1] 0.09403581

trees_and_education |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in NYC", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Education By Borough

The following tabs explore the association between the percentage of people who graduated high school and the number of trees per acre.

Manhattan

When examining only neighborhoods in Manhattan, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2170).

manhattan_trees_and_education <- trees_and_education |> 
  filter(borough == "Manhattan")

cor(pull(manhattan_trees_and_education, trees_per_acre), pull(manhattan_trees_and_education, graduated_hs_percent))

## [1] 0.216967

trees_and_education |> 
  filter(borough == "Manhattan") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Manhattan", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Brooklyn

When examining only neighborhoods in Brooklyn, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2240).

brooklyn_trees_and_education <- trees_and_education |> 
  filter(borough == "Brooklyn")

cor(pull(brooklyn_trees_and_education, trees_per_acre), pull(brooklyn_trees_and_education, graduated_hs_percent))

## [1] 0.223951

trees_and_education |> 
  filter(borough == "Brooklyn") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Brooklyn", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

The Bronx

When examining only neighborhoods in the Bronx, there is a moderate, negative, linear association between the percentage of people who graduated high school and the number of trees per acre (r = -0.4767). It appears that as the percentage of people who graduated high school increases, the number of trees per acre decreases. These results are contrary to our expectations. This may indicate that other factors may be at play.

bronx_trees_and_education <- trees_and_education |> 
  filter(borough == "Bronx")

cor(pull(bronx_trees_and_education, trees_per_acre), pull(bronx_trees_and_education, graduated_hs_percent))

## [1] -0.4766586

trees_and_education |> 
  filter(borough == "Bronx") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Bronx", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Staten Island

When examining only neighborhoods in Staten Island, there is a moderate, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.5839). As the percentage of people who graduated high school increases, the number of trees per acre increases.

staten_island_trees_and_education <- trees_and_education |> 
  filter(borough == "Staten Island")

cor(pull(staten_island_trees_and_education, trees_per_acre), pull(staten_island_trees_and_education, graduated_hs_percent))

## [1] 0.5839166

trees_and_education |> 
  filter(borough == "Staten Island") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Staten Island", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Queens

When examining only neighborhoods in Queens, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.1510 ).

queens_trees_and_education <- trees_and_education |> 
  filter(borough == "Queens")

cor(pull(queens_trees_and_education, trees_per_acre), pull(queens_trees_and_education, graduated_hs_percent))

## [1] 0.1510256

trees_and_education |> 
  filter(borough == "Queens") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Queens", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Summary

Summary: When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940). When examining neighborhoods by borough, the association between the percentage of people who graduated high school and the number of trees per acre varies. Notably, neighborhoods across Staten Island exhibit a moderate, positive, linear association between percentage of people who graduated high school and the number of trees per acre; as the percentage of people who graduated high school increases, the number of trees per acre increases.