Tree Data Cleaning

trees_2015 =
  read_csv("large_tree_data/2015_tree_raw.csv", na = c("", "NA", "Unknown")) |> 
  janitor::clean_names() |> 
  mutate(spc_common = str_to_title(spc_common)) |> 
  mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")))

Population

This plot shows the population breakdown of each New York borough by race and ethnicity category included in the US 2010 Census. White is the majority in all boroughs except for the Bronx. Hawaiian and Pacific Islander is consistently the lowest in all 5 boroughs.

pop_race_2010 = readxl::read_excel("small_data/pop_race2010_nta.xlsx", skip = 6, col_names = FALSE, na = "NA") |> 
  rename(
   "borough" = "...1" ,
   "census_FIPS" = "...2",
   "nta" = "...3",
   "nta_name" = "...4",
   "total_population" = "...5",
   "white_nonhis" = "...6",
   "black_nonhis" = "...7",
   "am_ind_alaska_nonhis" = "...8",
   "asian_nonhis" = "...9",
   "hawaii_pac_isl_nonhis" = "...10",
   "other_nonhis" = "...11",
   "two_races" = "...12",
   "hispanic_any_race" = "...13"
  ) |> 
  drop_na()

dem_race = 
  pop_race_2010 |> 
  group_by(borough) |> 
  summarize(white = sum(white_nonhis),
            black = sum(black_nonhis),
            asian = sum(asian_nonhis),
            am_ind_alaska = sum(am_ind_alaska_nonhis),
            hawaii_pac_island = sum(hawaii_pac_isl_nonhis),
            other = sum(other_nonhis),
            hispanic_all_races = sum(hispanic_any_race),
            two_races = sum(two_races)
            ) |> 
  pivot_longer(
    white:two_races,
    names_to = "race",
    values_to = "n"
  ) |> 
   mutate(borough = fct_reorder(borough, n)) 

ggplot(data = dem_race, aes(x = borough, y = n, fill = race)) + geom_bar(stat = 'identity', position = 'dodge')  + 
  labs(
    title = "Population of Borough by Race",
    x = "Borough",
    y = "Total Population"
  )

pop_race_2010s =  
  pop_race_2010 |> 
  select(- borough, -nta_name)

all_data = left_join(trees_2015, pop_race_2010s, by = "nta")

borough_data = 
  all_data |> 
  group_by(borough) |> 
  summarize(n_trees = n_distinct(tree_id),
            n_white = (sum(white_nonhis)/sum(total_population)*100),
            n_black = (sum(black_nonhis)/sum(total_population)*100),
            n_asian = (sum(asian_nonhis)/sum(total_population)*100),
            n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
            n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
            n_other = (sum(other_nonhis)/sum(total_population)*100),
            n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
            n_two_races = (sum(two_races)/sum(total_population)*100)
            ) |> 
  mutate(n_trees = as.numeric(n_trees)) |> 
  arrange(desc(n_trees))

borough_data |> 
 knitr::kable(digits = 3, caption = "Trees and Population Demographics by Borough")
Trees and Population Demographics by Borough
borough n_trees n_white n_black n_asian n_pacific_islander n_amer_ind n_other n_hispanic n_two_races
Queens 250551 29.840 17.494 22.617 0.051 0.303 1.601 25.424 2.670
Brooklyn 177293 34.912 33.732 9.614 0.025 0.187 0.439 19.488 1.603
Staten Island 105318 71.685 6.017 6.866 0.030 0.122 0.190 13.858 1.233
Bronx 85203 12.830 30.513 3.339 0.029 0.256 0.643 51.208 1.184
Manhattan 65423 50.743 13.488 9.241 0.032 0.132 0.327 24.153 1.885

Comments:

  • This table lists the number of trees and the percent racial and ethnic composition within each borough.
  • Queens has the highest number of trees, the highest percentage of Asian, Hawaiian and Pacific Islander, American Indian and Two Races population.
number_mhtn = 
  all_data |> 
  filter(borough == "Manhattan") |> 
  summarize(n_trees = n_distinct(tree_id))

percentages = 
  all_data |> 
  filter(borough == "Manhattan") |> 
  group_by(nta_name) |> 
  summarize(n_trees = n_distinct(tree_id),
            n_white = (sum(white_nonhis)/sum(total_population)*100),
            n_black = (sum(black_nonhis)/sum(total_population)*100),
            n_asian = (sum(asian_nonhis)/sum(total_population)*100),
            n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
            n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
            n_other = (sum(other_nonhis)/sum(total_population)*100),
            n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
            n_two_races = (sum(two_races)/sum(total_population)*100)
            ) |> 
  mutate(n_trees = as.numeric(n_trees)) |> 
  arrange(desc(n_trees))

percentages |> 
 knitr::kable(digits = 3, caption = "Manhattan Trees and Population Demographics by NTA")
Manhattan Trees and Population Demographics by NTA
nta_name n_trees n_white n_black n_asian n_pacific_islander n_amer_ind n_other n_hispanic n_two_races
Upper West Side 5881 67.709 8.420 5.959 0.026 0.103 0.336 15.460 1.988
Upper East Side-Carnegie Hill 4673 87.648 1.093 4.986 0.020 0.029 0.214 4.630 1.379
West Village 3837 80.891 2.023 8.153 0.030 0.075 0.353 6.062 2.413
Central Harlem North-Polo Grounds 3515 5.770 67.073 1.731 0.027 0.340 0.259 22.831 1.969
Washington Heights South 2990 11.252 10.454 2.727 0.006 0.144 0.360 74.056 1.001
Hudson Yards-Chelsea-Flatiron-Union Square 2988 65.091 5.726 11.785 0.030 0.133 0.372 14.602 2.262
Morningside Heights 2751 46.041 13.623 13.342 0.054 0.188 0.363 23.521 2.870
Washington Heights North 2684 25.776 4.078 2.534 0.015 0.086 0.317 66.149 1.044
Central Harlem South 2670 16.085 55.877 3.541 0.060 0.256 0.408 21.080 2.695
East Harlem North 2591 6.784 35.549 3.044 0.016 0.221 0.319 52.743 1.325
Hamilton Heights 2407 10.897 32.246 2.199 0.031 0.245 0.367 52.193 1.822
Lenox Hill-Roosevelt Island 2327 75.313 4.421 10.609 0.076 0.071 0.324 7.258 1.929
Yorkville 2269 75.996 3.667 9.271 0.032 0.065 0.277 8.810 1.881
Turtle Bay-East Midtown 2253 77.053 2.091 12.990 0.041 0.045 0.359 5.772 1.649
SoHo-TriBeCa-Civic Center-Little Italy 2215 66.094 2.185 22.175 0.026 0.070 0.400 6.481 2.569
Lincoln Square 2065 73.351 4.407 11.248 0.023 0.094 0.285 8.647 1.945
East Harlem South 2014 17.395 24.571 8.293 0.095 0.166 0.376 47.492 1.611
Clinton 2008 56.427 6.253 15.007 0.068 0.153 0.394 19.347 2.352
Lower East Side 1943 22.552 10.871 24.900 0.018 0.195 0.262 39.571 1.632
Murray Hill-Kips Bay 1746 66.647 4.775 16.225 0.032 0.108 0.357 9.870 1.987
East Village 1575 65.452 3.949 14.863 0.050 0.145 0.412 12.378 2.751
Marble Hill-Inwood 1508 15.103 9.068 1.891 0.011 0.137 0.383 72.428 0.980
Chinatown 1474 16.339 4.776 63.872 0.023 0.079 0.157 13.419 1.336
Battery Park City-Lower Manhattan 1311 65.405 3.244 20.192 0.043 0.088 0.385 7.695 2.947
Midtown-Midtown South 1196 64.097 4.576 20.754 0.028 0.119 0.321 8.117 1.987
Gramercy 1172 73.685 3.298 13.363 0.036 0.068 0.275 7.228 2.047
Manhattanville 913 7.451 25.786 2.161 0.022 0.336 0.331 62.802 1.111
Stuyvesant Town-Cooper Village 447 74.113 3.568 12.647 0.029 0.109 0.299 6.675 2.561

Comments:

  • This table lists the number of trees and percent racial and ethnic composition within each NTA in Manhattan.
  • The table is ranked by most number of trees and thus, we can conclude that the top 3 neighborhoods with the most trees have a majority white population.

Since NTAs may differ in size, trees per acre was used to standardize street tree counts in each NTA by the acreage of the NTA. 6 NTAs from the 2015 Tree Census were missing acreage information, so these NTAs were removed for these analyses.

Only alive trees were included in the poverty level and education analyses.

Correlation coefficients were calculated to gauge if a linear association exists between our x and y variables. To test the significance of a correlation coefficient, x and y variables must have a bivariate normal distribution. Since most of our x and y variables exhibited a skewed distribution (histograms not shown), we were unable to test the significance of the correlation coefficient at this time.

Poverty

Is there an association between the percentage of people whose income is below the poverty line and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?

All of NYC

When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0220).

acres_raw <- 
  read_excel("small_data/t_pl_p5_nta.xlsx", 
             range = "A9:J203", 
             col_names = c("borough", "county_code", "nta", "nta_name",
                           "total_pop_2000", "total_pop_2010", "pop_change_num",
                           "pop_change_per", "total_acres", "persons_per_acre")
             ) |> 
  janitor::clean_names()

acres_sub <- acres_raw |> 
  select(nta_name, total_acres)

trees_per_nta <- trees_2015 |> 
  select(nta_name, nta, borough, status) |> 
  filter(status == "Alive") |> 
  count(nta_name, borough) |> 
  rename(num_trees = n)

trees_and_acres <- left_join(trees_per_nta, acres_sub, by = "nta_name")

num_missing_nta <- sum(is.na(trees_and_acres$total_acres))

trees_per_acre_df <- trees_and_acres |> 
  filter(!is.na(total_acres)) |> 
  mutate(trees_per_acre = num_trees/total_acres) |> 
  arrange(desc(trees_per_acre))

poverty_raw <-
  read_csv("large_tree_data/NYC EH Data Portal - Neighborhood poverty.csv") |> 
  janitor::clean_names()

poverty_clean <- poverty_raw |> 
  rename(nta_name = geography, poverty_percent = percent) |> 
  filter(geo_type == "NTA2010" & time == "2013-17") |> 
  select(nta_name, poverty_percent)

trees_and_poverty <- left_join(trees_per_acre_df, poverty_clean, by = "nta_name")

cor(pull(trees_and_poverty, trees_per_acre), pull(trees_and_poverty, poverty_percent))
## [1] -0.02202705
trees_and_poverty |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in NYC",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Poverty By Borough

The following tabs explore the association between percentage of people living below the poverty line and number of trees per acre.

Manhattan

When examining only neighborhoods in Manhattan, there is a weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.2111).

manhattan_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Manhattan")

cor(pull(manhattan_trees_and_poverty, trees_per_acre), pull(manhattan_trees_and_poverty, poverty_percent))
## [1] -0.2110746
trees_and_poverty |> 
  filter(borough == "Manhattan") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Manhattan",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Brooklyn

When examining only neighborhoods in Brooklyn, there remains a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0027). This could indicate that a non-linear relationship may be more suitable; it is also possible that no association exists.

brooklyn_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Brooklyn")

cor(pull(brooklyn_trees_and_poverty, trees_per_acre), pull(brooklyn_trees_and_poverty, poverty_percent))
## [1] -0.002666612
trees_and_poverty |> 
  filter(borough == "Brooklyn") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Brooklyn",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

The Bronx

When examining only neighborhoods in the Bronx, there is a moderate, positive, linear association between percentage of people living below the poverty line and number of trees per acre (r = 0.4558). It appears that as the percentage of people living below the poverty line increases, the number of trees per acre increases. These results are contrary to our expectations. This may indicate that other factors may be at play.

bronx_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Bronx")

cor(pull(bronx_trees_and_poverty, trees_per_acre), pull(bronx_trees_and_poverty, poverty_percent))
## [1] 0.4557542
trees_and_poverty |> 
  filter(borough == "Bronx") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Bronx",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Staten Island

When examining only neighborhoods in Staten Island, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4646). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.

staten_island_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Staten Island")

cor(pull(staten_island_trees_and_poverty, trees_per_acre), pull(staten_island_trees_and_poverty, poverty_percent))
## [1] -0.4646225
trees_and_poverty |> 
  filter(borough == "Staten Island") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Staten Island",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Queens

When examining only neighborhoods in Queens, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4384). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.

queens_trees_and_poverty <- trees_and_poverty |> 
  filter(borough == "Queens")

cor(pull(queens_trees_and_poverty, trees_per_acre), pull(queens_trees_and_poverty, poverty_percent))
## [1] -0.4383884
trees_and_poverty |> 
  filter(borough == "Queens") |> 
  plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, 
                        "<br>%below poverty line: ", poverty_percent, 
                        "<br>trees per acre: ", trees_per_acre)) |> 
    layout(title = "Percent below the poverty line and trees per acre in Queens",
           xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
           yaxis = list(title = 'Number of trees per acre'),
           legend = list(title=list(text='Neighborhood')))

Summary

Summary: When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre. When examining neighborhoods by borough, the association between percentage of people living below the poverty line and number of trees per acre varies. Notably, neighborhoods across Staten Island and Queens exhibit a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre; as the percentage of people living below the poverty line increases, the number of trees per acre decreases.

Education

Is there an association between the percentage of people who graduated high school and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?

All of NYC

When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940).

education_raw <-
  read_csv("large_tree_data/NYC EH Data Portal - Graduated high school.csv") |> 
  janitor::clean_names()

education_clean <- education_raw |> 
  rename(nta_name = geography, graduated_hs_percent = percent) |> 
  filter(geo_type == "NTA2010" & time == "2013-17") |> 
  select(nta_name, graduated_hs_percent)

trees_and_education <- left_join(trees_per_acre_df, education_clean, by = "nta_name")

cor(pull(trees_and_education, trees_per_acre), pull(trees_and_education, graduated_hs_percent))
## [1] 0.09403581
trees_and_education |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in NYC", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Education By Borough

The following tabs explore the association between the percentage of people who graduated high school and the number of trees per acre.

Manhattan

When examining only neighborhoods in Manhattan, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2170).

manhattan_trees_and_education <- trees_and_education |> 
  filter(borough == "Manhattan")

cor(pull(manhattan_trees_and_education, trees_per_acre), pull(manhattan_trees_and_education, graduated_hs_percent))
## [1] 0.216967
trees_and_education |> 
  filter(borough == "Manhattan") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Manhattan", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Brooklyn

When examining only neighborhoods in Brooklyn, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2240).

brooklyn_trees_and_education <- trees_and_education |> 
  filter(borough == "Brooklyn")

cor(pull(brooklyn_trees_and_education, trees_per_acre), pull(brooklyn_trees_and_education, graduated_hs_percent))
## [1] 0.223951
trees_and_education |> 
  filter(borough == "Brooklyn") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Brooklyn", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

The Bronx

When examining only neighborhoods in the Bronx, there is a moderate, negative, linear association between the percentage of people who graduated high school and the number of trees per acre (r = -0.4767). It appears that as the percentage of people who graduated high school increases, the number of trees per acre decreases. These results are contrary to our expectations. This may indicate that other factors may be at play.

bronx_trees_and_education <- trees_and_education |> 
  filter(borough == "Bronx")

cor(pull(bronx_trees_and_education, trees_per_acre), pull(bronx_trees_and_education, graduated_hs_percent))
## [1] -0.4766586
trees_and_education |> 
  filter(borough == "Bronx") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Bronx", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Staten Island

When examining only neighborhoods in Staten Island, there is a moderate, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.5839). As the percentage of people who graduated high school increases, the number of trees per acre increases.

staten_island_trees_and_education <- trees_and_education |> 
  filter(borough == "Staten Island")

cor(pull(staten_island_trees_and_education, trees_per_acre), pull(staten_island_trees_and_education, graduated_hs_percent))
## [1] 0.5839166
trees_and_education |> 
  filter(borough == "Staten Island") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Staten Island", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Queens

When examining only neighborhoods in Queens, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.1510 ).

queens_trees_and_education <- trees_and_education |> 
  filter(borough == "Queens")

cor(pull(queens_trees_and_education, trees_per_acre), pull(queens_trees_and_education, graduated_hs_percent))
## [1] 0.1510256
trees_and_education |> 
  filter(borough == "Queens") |> 
  plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre, 
          color = ~nta_name,
          colors = "viridis", 
          type = "scatter",
          mode = "markers",
          text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ", 
                        graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |> 
  layout(title = "Percent graduated high school and trees per acre in Queens", 
         xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'), 
         yaxis = list(title = 'Number of trees per acre'), 
         legend = list(title=list(text='Neighborhood')))

Summary

Summary: When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940). When examining neighborhoods by borough, the association between the percentage of people who graduated high school and the number of trees per acre varies. Notably, neighborhoods across Staten Island exhibit a moderate, positive, linear association between percentage of people who graduated high school and the number of trees per acre; as the percentage of people who graduated high school increases, the number of trees per acre increases.