trees_2015 =
read_csv("large_tree_data/2015_tree_raw.csv", na = c("", "NA", "Unknown")) |>
janitor::clean_names() |>
mutate(spc_common = str_to_title(spc_common)) |>
mutate(health = fct_relevel(health, c("Good", "Fair", "Poor")))
This plot shows the population breakdown of each New York borough by race and ethnicity category included in the US 2010 Census. White is the majority in all boroughs except for the Bronx. Hawaiian and Pacific Islander is consistently the lowest in all 5 boroughs.
pop_race_2010 = readxl::read_excel("small_data/pop_race2010_nta.xlsx", skip = 6, col_names = FALSE, na = "NA") |>
rename(
"borough" = "...1" ,
"census_FIPS" = "...2",
"nta" = "...3",
"nta_name" = "...4",
"total_population" = "...5",
"white_nonhis" = "...6",
"black_nonhis" = "...7",
"am_ind_alaska_nonhis" = "...8",
"asian_nonhis" = "...9",
"hawaii_pac_isl_nonhis" = "...10",
"other_nonhis" = "...11",
"two_races" = "...12",
"hispanic_any_race" = "...13"
) |>
drop_na()
dem_race =
pop_race_2010 |>
group_by(borough) |>
summarize(white = sum(white_nonhis),
black = sum(black_nonhis),
asian = sum(asian_nonhis),
am_ind_alaska = sum(am_ind_alaska_nonhis),
hawaii_pac_island = sum(hawaii_pac_isl_nonhis),
other = sum(other_nonhis),
hispanic_all_races = sum(hispanic_any_race),
two_races = sum(two_races)
) |>
pivot_longer(
white:two_races,
names_to = "race",
values_to = "n"
) |>
mutate(borough = fct_reorder(borough, n))
ggplot(data = dem_race, aes(x = borough, y = n, fill = race)) + geom_bar(stat = 'identity', position = 'dodge') +
labs(
title = "Population of Borough by Race",
x = "Borough",
y = "Total Population"
)
pop_race_2010s =
pop_race_2010 |>
select(- borough, -nta_name)
all_data = left_join(trees_2015, pop_race_2010s, by = "nta")
borough_data =
all_data |>
group_by(borough) |>
summarize(n_trees = n_distinct(tree_id),
n_white = (sum(white_nonhis)/sum(total_population)*100),
n_black = (sum(black_nonhis)/sum(total_population)*100),
n_asian = (sum(asian_nonhis)/sum(total_population)*100),
n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
n_other = (sum(other_nonhis)/sum(total_population)*100),
n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
n_two_races = (sum(two_races)/sum(total_population)*100)
) |>
mutate(n_trees = as.numeric(n_trees)) |>
arrange(desc(n_trees))
borough_data |>
knitr::kable(digits = 3, caption = "Trees and Population Demographics by Borough")
borough | n_trees | n_white | n_black | n_asian | n_pacific_islander | n_amer_ind | n_other | n_hispanic | n_two_races |
---|---|---|---|---|---|---|---|---|---|
Queens | 250551 | 29.840 | 17.494 | 22.617 | 0.051 | 0.303 | 1.601 | 25.424 | 2.670 |
Brooklyn | 177293 | 34.912 | 33.732 | 9.614 | 0.025 | 0.187 | 0.439 | 19.488 | 1.603 |
Staten Island | 105318 | 71.685 | 6.017 | 6.866 | 0.030 | 0.122 | 0.190 | 13.858 | 1.233 |
Bronx | 85203 | 12.830 | 30.513 | 3.339 | 0.029 | 0.256 | 0.643 | 51.208 | 1.184 |
Manhattan | 65423 | 50.743 | 13.488 | 9.241 | 0.032 | 0.132 | 0.327 | 24.153 | 1.885 |
Comments:
number_mhtn =
all_data |>
filter(borough == "Manhattan") |>
summarize(n_trees = n_distinct(tree_id))
percentages =
all_data |>
filter(borough == "Manhattan") |>
group_by(nta_name) |>
summarize(n_trees = n_distinct(tree_id),
n_white = (sum(white_nonhis)/sum(total_population)*100),
n_black = (sum(black_nonhis)/sum(total_population)*100),
n_asian = (sum(asian_nonhis)/sum(total_population)*100),
n_pacific_islander = (sum(hawaii_pac_isl_nonhis)/sum(total_population)*100),
n_amer_ind = (sum(am_ind_alaska_nonhis)/sum(total_population)*100),
n_other = (sum(other_nonhis)/sum(total_population)*100),
n_hispanic = (sum(hispanic_any_race)/sum(total_population)*100),
n_two_races = (sum(two_races)/sum(total_population)*100)
) |>
mutate(n_trees = as.numeric(n_trees)) |>
arrange(desc(n_trees))
percentages |>
knitr::kable(digits = 3, caption = "Manhattan Trees and Population Demographics by NTA")
nta_name | n_trees | n_white | n_black | n_asian | n_pacific_islander | n_amer_ind | n_other | n_hispanic | n_two_races |
---|---|---|---|---|---|---|---|---|---|
Upper West Side | 5881 | 67.709 | 8.420 | 5.959 | 0.026 | 0.103 | 0.336 | 15.460 | 1.988 |
Upper East Side-Carnegie Hill | 4673 | 87.648 | 1.093 | 4.986 | 0.020 | 0.029 | 0.214 | 4.630 | 1.379 |
West Village | 3837 | 80.891 | 2.023 | 8.153 | 0.030 | 0.075 | 0.353 | 6.062 | 2.413 |
Central Harlem North-Polo Grounds | 3515 | 5.770 | 67.073 | 1.731 | 0.027 | 0.340 | 0.259 | 22.831 | 1.969 |
Washington Heights South | 2990 | 11.252 | 10.454 | 2.727 | 0.006 | 0.144 | 0.360 | 74.056 | 1.001 |
Hudson Yards-Chelsea-Flatiron-Union Square | 2988 | 65.091 | 5.726 | 11.785 | 0.030 | 0.133 | 0.372 | 14.602 | 2.262 |
Morningside Heights | 2751 | 46.041 | 13.623 | 13.342 | 0.054 | 0.188 | 0.363 | 23.521 | 2.870 |
Washington Heights North | 2684 | 25.776 | 4.078 | 2.534 | 0.015 | 0.086 | 0.317 | 66.149 | 1.044 |
Central Harlem South | 2670 | 16.085 | 55.877 | 3.541 | 0.060 | 0.256 | 0.408 | 21.080 | 2.695 |
East Harlem North | 2591 | 6.784 | 35.549 | 3.044 | 0.016 | 0.221 | 0.319 | 52.743 | 1.325 |
Hamilton Heights | 2407 | 10.897 | 32.246 | 2.199 | 0.031 | 0.245 | 0.367 | 52.193 | 1.822 |
Lenox Hill-Roosevelt Island | 2327 | 75.313 | 4.421 | 10.609 | 0.076 | 0.071 | 0.324 | 7.258 | 1.929 |
Yorkville | 2269 | 75.996 | 3.667 | 9.271 | 0.032 | 0.065 | 0.277 | 8.810 | 1.881 |
Turtle Bay-East Midtown | 2253 | 77.053 | 2.091 | 12.990 | 0.041 | 0.045 | 0.359 | 5.772 | 1.649 |
SoHo-TriBeCa-Civic Center-Little Italy | 2215 | 66.094 | 2.185 | 22.175 | 0.026 | 0.070 | 0.400 | 6.481 | 2.569 |
Lincoln Square | 2065 | 73.351 | 4.407 | 11.248 | 0.023 | 0.094 | 0.285 | 8.647 | 1.945 |
East Harlem South | 2014 | 17.395 | 24.571 | 8.293 | 0.095 | 0.166 | 0.376 | 47.492 | 1.611 |
Clinton | 2008 | 56.427 | 6.253 | 15.007 | 0.068 | 0.153 | 0.394 | 19.347 | 2.352 |
Lower East Side | 1943 | 22.552 | 10.871 | 24.900 | 0.018 | 0.195 | 0.262 | 39.571 | 1.632 |
Murray Hill-Kips Bay | 1746 | 66.647 | 4.775 | 16.225 | 0.032 | 0.108 | 0.357 | 9.870 | 1.987 |
East Village | 1575 | 65.452 | 3.949 | 14.863 | 0.050 | 0.145 | 0.412 | 12.378 | 2.751 |
Marble Hill-Inwood | 1508 | 15.103 | 9.068 | 1.891 | 0.011 | 0.137 | 0.383 | 72.428 | 0.980 |
Chinatown | 1474 | 16.339 | 4.776 | 63.872 | 0.023 | 0.079 | 0.157 | 13.419 | 1.336 |
Battery Park City-Lower Manhattan | 1311 | 65.405 | 3.244 | 20.192 | 0.043 | 0.088 | 0.385 | 7.695 | 2.947 |
Midtown-Midtown South | 1196 | 64.097 | 4.576 | 20.754 | 0.028 | 0.119 | 0.321 | 8.117 | 1.987 |
Gramercy | 1172 | 73.685 | 3.298 | 13.363 | 0.036 | 0.068 | 0.275 | 7.228 | 2.047 |
Manhattanville | 913 | 7.451 | 25.786 | 2.161 | 0.022 | 0.336 | 0.331 | 62.802 | 1.111 |
Stuyvesant Town-Cooper Village | 447 | 74.113 | 3.568 | 12.647 | 0.029 | 0.109 | 0.299 | 6.675 | 2.561 |
Comments:
Since NTAs may differ in size, trees per acre was used to standardize street tree counts in each NTA by the acreage of the NTA. 6 NTAs from the 2015 Tree Census were missing acreage information, so these NTAs were removed for these analyses.
Only alive trees were included in the poverty level and education analyses.
Correlation coefficients were calculated to gauge if a linear association exists between our x and y variables. To test the significance of a correlation coefficient, x and y variables must have a bivariate normal distribution. Since most of our x and y variables exhibited a skewed distribution (histograms not shown), we were unable to test the significance of the correlation coefficient at this time.
Is there an association between the percentage of people whose income is below the poverty line and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?
When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0220).
acres_raw <-
read_excel("small_data/t_pl_p5_nta.xlsx",
range = "A9:J203",
col_names = c("borough", "county_code", "nta", "nta_name",
"total_pop_2000", "total_pop_2010", "pop_change_num",
"pop_change_per", "total_acres", "persons_per_acre")
) |>
janitor::clean_names()
acres_sub <- acres_raw |>
select(nta_name, total_acres)
trees_per_nta <- trees_2015 |>
select(nta_name, nta, borough, status) |>
filter(status == "Alive") |>
count(nta_name, borough) |>
rename(num_trees = n)
trees_and_acres <- left_join(trees_per_nta, acres_sub, by = "nta_name")
num_missing_nta <- sum(is.na(trees_and_acres$total_acres))
trees_per_acre_df <- trees_and_acres |>
filter(!is.na(total_acres)) |>
mutate(trees_per_acre = num_trees/total_acres) |>
arrange(desc(trees_per_acre))
poverty_raw <-
read_csv("large_tree_data/NYC EH Data Portal - Neighborhood poverty.csv") |>
janitor::clean_names()
poverty_clean <- poverty_raw |>
rename(nta_name = geography, poverty_percent = percent) |>
filter(geo_type == "NTA2010" & time == "2013-17") |>
select(nta_name, poverty_percent)
trees_and_poverty <- left_join(trees_per_acre_df, poverty_clean, by = "nta_name")
cor(pull(trees_and_poverty, trees_per_acre), pull(trees_and_poverty, poverty_percent))
## [1] -0.02202705
trees_and_poverty |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in NYC",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
The following tabs explore the association between percentage of people living below the poverty line and number of trees per acre.
When examining only neighborhoods in Manhattan, there is a weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.2111).
manhattan_trees_and_poverty <- trees_and_poverty |>
filter(borough == "Manhattan")
cor(pull(manhattan_trees_and_poverty, trees_per_acre), pull(manhattan_trees_and_poverty, poverty_percent))
## [1] -0.2110746
trees_and_poverty |>
filter(borough == "Manhattan") |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in Manhattan",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Brooklyn, there remains a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = - 0.0027). This could indicate that a non-linear relationship may be more suitable; it is also possible that no association exists.
brooklyn_trees_and_poverty <- trees_and_poverty |>
filter(borough == "Brooklyn")
cor(pull(brooklyn_trees_and_poverty, trees_per_acre), pull(brooklyn_trees_and_poverty, poverty_percent))
## [1] -0.002666612
trees_and_poverty |>
filter(borough == "Brooklyn") |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in Brooklyn",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in the Bronx, there is a moderate, positive, linear association between percentage of people living below the poverty line and number of trees per acre (r = 0.4558). It appears that as the percentage of people living below the poverty line increases, the number of trees per acre increases. These results are contrary to our expectations. This may indicate that other factors may be at play.
bronx_trees_and_poverty <- trees_and_poverty |>
filter(borough == "Bronx")
cor(pull(bronx_trees_and_poverty, trees_per_acre), pull(bronx_trees_and_poverty, poverty_percent))
## [1] 0.4557542
trees_and_poverty |>
filter(borough == "Bronx") |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in Bronx",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Staten Island, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4646). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.
staten_island_trees_and_poverty <- trees_and_poverty |>
filter(borough == "Staten Island")
cor(pull(staten_island_trees_and_poverty, trees_per_acre), pull(staten_island_trees_and_poverty, poverty_percent))
## [1] -0.4646225
trees_and_poverty |>
filter(borough == "Staten Island") |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in Staten Island",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Queens, there is a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre (r = -0.4384). As the percentage of people living below the poverty line increases, the number of trees per acre decreases.
queens_trees_and_poverty <- trees_and_poverty |>
filter(borough == "Queens")
cor(pull(queens_trees_and_poverty, trees_per_acre), pull(queens_trees_and_poverty, poverty_percent))
## [1] -0.4383884
trees_and_poverty |>
filter(borough == "Queens") |>
plot_ly(data = _, x = ~poverty_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough,
"<br>%below poverty line: ", poverty_percent,
"<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent below the poverty line and trees per acre in Queens",
xaxis = list(title = 'Percentage of people whose income <br> is below the poverty line'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
Summary: When examining all of the neighborhoods in NYC, there is a very weak, negative, linear association between percentage of people living below the poverty line and number of trees per acre. When examining neighborhoods by borough, the association between percentage of people living below the poverty line and number of trees per acre varies. Notably, neighborhoods across Staten Island and Queens exhibit a moderate, negative, linear association between percentage of people living below the poverty line and number of trees per acre; as the percentage of people living below the poverty line increases, the number of trees per acre decreases.
Is there an association between the percentage of people who graduated high school and the number of trees per acre 1) across neighborhoods in NYC and 2) across neighborhoods in each borough?
When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940).
education_raw <-
read_csv("large_tree_data/NYC EH Data Portal - Graduated high school.csv") |>
janitor::clean_names()
education_clean <- education_raw |>
rename(nta_name = geography, graduated_hs_percent = percent) |>
filter(geo_type == "NTA2010" & time == "2013-17") |>
select(nta_name, graduated_hs_percent)
trees_and_education <- left_join(trees_per_acre_df, education_clean, by = "nta_name")
cor(pull(trees_and_education, trees_per_acre), pull(trees_and_education, graduated_hs_percent))
## [1] 0.09403581
trees_and_education |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in NYC",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
The following tabs explore the association between the percentage of people who graduated high school and the number of trees per acre.
When examining only neighborhoods in Manhattan, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2170).
manhattan_trees_and_education <- trees_and_education |>
filter(borough == "Manhattan")
cor(pull(manhattan_trees_and_education, trees_per_acre), pull(manhattan_trees_and_education, graduated_hs_percent))
## [1] 0.216967
trees_and_education |>
filter(borough == "Manhattan") |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in Manhattan",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Brooklyn, there remains a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.2240).
brooklyn_trees_and_education <- trees_and_education |>
filter(borough == "Brooklyn")
cor(pull(brooklyn_trees_and_education, trees_per_acre), pull(brooklyn_trees_and_education, graduated_hs_percent))
## [1] 0.223951
trees_and_education |>
filter(borough == "Brooklyn") |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in Brooklyn",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in the Bronx, there is a moderate, negative, linear association between the percentage of people who graduated high school and the number of trees per acre (r = -0.4767). It appears that as the percentage of people who graduated high school increases, the number of trees per acre decreases. These results are contrary to our expectations. This may indicate that other factors may be at play.
bronx_trees_and_education <- trees_and_education |>
filter(borough == "Bronx")
cor(pull(bronx_trees_and_education, trees_per_acre), pull(bronx_trees_and_education, graduated_hs_percent))
## [1] -0.4766586
trees_and_education |>
filter(borough == "Bronx") |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in Bronx",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Staten Island, there is a moderate, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.5839). As the percentage of people who graduated high school increases, the number of trees per acre increases.
staten_island_trees_and_education <- trees_and_education |>
filter(borough == "Staten Island")
cor(pull(staten_island_trees_and_education, trees_per_acre), pull(staten_island_trees_and_education, graduated_hs_percent))
## [1] 0.5839166
trees_and_education |>
filter(borough == "Staten Island") |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in Staten Island",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
When examining only neighborhoods in Queens, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.1510 ).
queens_trees_and_education <- trees_and_education |>
filter(borough == "Queens")
cor(pull(queens_trees_and_education, trees_per_acre), pull(queens_trees_and_education, graduated_hs_percent))
## [1] 0.1510256
trees_and_education |>
filter(borough == "Queens") |>
plot_ly(data = _, x = ~graduated_hs_percent, y = ~trees_per_acre,
color = ~nta_name,
colors = "viridis",
type = "scatter",
mode = "markers",
text = ~paste("neighborhood: ", nta_name, "<br>borough: ", borough, "<br>% graduated HS: ",
graduated_hs_percent, "<br>trees per acre: ", trees_per_acre)) |>
layout(title = "Percent graduated high school and trees per acre in Queens",
xaxis = list(title = 'Percent graduated <br> high school (includes equivalency)'),
yaxis = list(title = 'Number of trees per acre'),
legend = list(title=list(text='Neighborhood')))
Summary: When examining all of the neighborhoods in NYC, there is a weak, positive, linear association between the percentage of people who graduated high school and the number of trees per acre (r = 0.0940). When examining neighborhoods by borough, the association between the percentage of people who graduated high school and the number of trees per acre varies. Notably, neighborhoods across Staten Island exhibit a moderate, positive, linear association between percentage of people who graduated high school and the number of trees per acre; as the percentage of people who graduated high school increases, the number of trees per acre increases.