Due to global warming, one of the difficulties that the urban environment face is the increase of precipitation. There is a chance that this will lead to water pollution as the stormwater that runs through the urban impervious surface collects pollutants and enters the sewer system. There has been continuous effort to implement green infrastructure to adapt to this environmental change. However there are certain limits because this involves the voluntary participation of individual households. This study will help understand the motivation of the participants of the past rain barrel project and help guide environmental organizations to plan how to increase participation.
This project will analyze the motivation of households participating in the rain barrel project by comparing contributing factors which are the education attainment and median income at the census tract level. The education attainment and income of the people participating in this project will be represented as a map. Then the shapefile of the rain barrels will be overlapped in order to analyze the demographic characteristic of households participating in the project.
library(acs)
install.packages("base")
library(choroplethr)
library(ggplot2)
library(choroplethrMaps)
library(ggmap)
#install.packages("mapproj")
library(mapproj)
library(maps)
library(tidycensus)
library(tidyverse)
library(kableExtra)
#knitr::opts_chunk$set(cache=TRUE) # cache the results for quick compiling
#register your api key
#use personal account not ub gmail account
register_google("")
#census api key
census_api_key("de66b80e57fd740d87d1af03c5c306086b4f17fa")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
#2 educational attainment
#get_acs() will be used.
#search for variables
v1 <- load_variables(2019, "acs5", cache = TRUE)
#View(v1)
#variable:B15003_001: total:educational attainment for population 25 years and older
#variable: B15003_001, B15003_017 to B15003_025 will be used
#test
#educational attainment data only for total
ed<-get_acs(geography="tract",variables=c(educationtotal="B15003_001"),county="Erie",state="NY",year=2019)
## Getting data from the 2015-2019 5-year ACS
#view(ed)
#We need to identify the variables we are willing to use and name it again to make it understandable.
#B19019_001 will be later used for median income. This represents the total of median household income by household size.
get_vars = c('B19019_001',paste('B15003_',sprintf('%03d',c(1,17:25)),sep=''))
names(get_vars) = c('income','all','hs','ged','someco1','someco2',
'as','bac','mas','prof','doc')
edu_data = get_acs(geography = 'tract', variables = get_vars,
state='NY',county='Erie',geometry='TRUE')
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 39%
|
|============================ | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|=================================== | 50%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 55%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|============================================ | 63%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 72%
|
|==================================================== | 74%
|
|====================================================== | 76%
|
|====================================================== | 78%
|
|======================================================== | 80%
|
|========================================================== | 82%
|
|============================================================ | 85%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 99%
|
|======================================================================| 100%
#did I use the 5 year acs table? double check? yes
as_tibble(edu_data) %>% select(-geometry)
## # A tibble: 2,607 × 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 36029014604 Census Tract 146.04, Erie County, New Yo… all 5480 174
## 2 36029014604 Census Tract 146.04, Erie County, New Yo… hs 674 159
## 3 36029014604 Census Tract 146.04, Erie County, New Yo… ged 28 33
## 4 36029014604 Census Tract 146.04, Erie County, New Yo… someco1 308 139
## 5 36029014604 Census Tract 146.04, Erie County, New Yo… someco2 424 193
## 6 36029014604 Census Tract 146.04, Erie County, New Yo… as 412 156
## 7 36029014604 Census Tract 146.04, Erie County, New Yo… bac 1586 251
## 8 36029014604 Census Tract 146.04, Erie County, New Yo… mas 1134 220
## 9 36029014604 Census Tract 146.04, Erie County, New Yo… prof 497 172
## 10 36029014604 Census Tract 146.04, Erie County, New Yo… doc 268 114
## # … with 2,597 more rows
#classify education attainment to at least hight school and at least bachelor degree
library(tidyverse)
acs_spread = edu_data %>%
select(-moe) %>%
spread(key=variable,value=estimate) %>%
mutate('atleast_hs' = (hs + ged+ someco1 + someco2 + as + bac + mas + prof + doc) / all,
'atleast_bs' = (bac + mas + prof + doc) / all) %>%
select(GEOID,income,atleast_hs,atleast_bs)
#Continuing..
Add any additional processing steps here.
[~200 words]
Tables and figures (maps and other graphics) are carefully planned to convey the results of your analysis. Intense exploration and evidence of many trials and failures. The author looked at the data in many different ways before coming to the final presentation of the data.
Show tables, plots, etc. and describe them.
[~200 words]
Clear summary adequately describing the results and putting them in context. Discussion of further questions and ways to continue investigation.
All sources are cited in a consistent manner
#20211208 new #download the rain barrel 2015 shapefile
library(sf)
rb_2015<-st_read(“C:/R_Project/RB1516/2015_completed_RBinstall.shp”)
#Projected CRS: NAD83 / New York West (ftUS)
#pipe dataset to tibble
library(tidyverse)
rb_2015_t = st_read(“C:/R_Project/RB1516/2015_completed_RBinstall.shp”) %>% as_tibble()
head(rb_2015_t)
#tidy data #add count
#new packages library(tidycensus) library(tidyverse) library(sf);library(ggplot2);library(raster) ;library(sp);library(GISTools) library(dplyr) library(rgdal) library(magrittr) library(lubridate)#date library(leaflet) library(spdep);library(rgdal); library(viridis) library(mapview) library(RColorBrewer) library(rgeos)
#rb_2015_t%>%class() is tibble #rb_2015 is sf
rb_2015%>%class()
#census data #census_tract is sf
census_tract<-st_read(“C:/R_Project/2015_CensusTract”)
head(census_tract)
census_tract%>%class()
#count rb
rb_2015%>%class()
census_tract%>%class()
#rb_2015_sp <-sp #rb_2015_t <-tbl
rb_2015_df<-rb_2015_sp%>%as_tibble()
coordinates(rb_2015_df) <- ~ Longitude+ Latitude
head(rb_2015_t) view(rb_2015_t)
#https://gis.stackexchange.com/questions/110117/counting-number-of-points-in-polygon-using-r
library(“raster”)
library(“sp”)
class(rb_2015)
#convert sf to sp
census_tract <-as(st_geometry(census_tract), “Spatial”)
#proj4string(rb_2015_sp)
#Warning message: #In proj4string(rb_2015_sp) : #CRS object has comment, which is lost in output
#https://cran.r-project.org/web/packages/sp/vignettes/CRS_warnings.html
slot(rb_2015_df, “proj4string”)
slot(census_tract,“proj4string”)
slot(rb_2015_sp, “proj4string”) <- crs(census_tract)
tmp<-poly.counts(pts=rb_2015_t,polys = census_tract)
#setNames(tmp, NYC_nb@data$NAME_1)
tmp_df <- data.frame(id = names(tmp), count = tmp)
census_tract$count<- tmp
mapview::mapview(census_tract ,zcol = “count”)
head(tmp_df)
#census_tract only has GEOID No FIPs
#convert latitude longitude to census tract or GEOID or fips
#https://shiandy.com/post/2020/11/02/mapping-lat-long-to-fips/ #reference:https://gis.stackexchange.com/questions/21533/api-to-reverse-geocode-latitude-longitude-to-census-tract
n <- 1e5 set.seed(7477263) county_bbox <- st_bbox(census_tracts) # randomly sample longitude and latitude from bounding box latlong_dat <- data.frame(id = seq_len(n), x = rnorm(n), Longitude = runif(n, min = county_bbox[“xmin”], max = county_bbox[“xmax”]), Latitude = runif(n, min = county_bbox[“ymin”], max = county_bbox[“ymax”]))
latlong_sf <- latlong_dat %>% filter(!is.na(Latitude), !is.na(Longitude)) %>% st_as_sf(coords = c(“Longitude”, “Latitude”), crs = st_crs(census_tract))
system.time({ intersected <- st_intersects(latlong_sf, census_tract) })
latlong_final <- latlong_sf %>% mutate(intersection = as.integer(intersected), fips = if_else(is.na(intersection), ““, census_tract$GEOID[intersection]))
head(latlong_final)
census_tract %>% filter(startsWith(GEOID, “36029”)) %>% select(geometry) %>% plot() latlong_final %>% filter(fips == “36013036700”) %>% select(geometry) %>% slice_sample(n = 10) %>% plot(add = TRUE, reset = FALSE, pch = 16, col = “red”)
#count rain barrels per census tract
#n.breach = poly.counts(breach,blocks) # Compute densities and map them #choropleth(blocks,n.breach/poly.areas(blocks))
#proj4string(crime_df) <- crs(NYC_nb)
#tmp<-poly.counts(pts=crime_df,polys = NYC_nb)
#These two line are the main thing that counts the point
#income and education and census tract
#graph