Read in data from 2009 to 2020 in long format.
dat_phenophase
## # A tibble: 4,116,485 × 16
## site canopy heat heat_name water water_name block plot species barcode
## <chr> <chr> <fct> <fct> <fct> <fct> <chr> <chr> <chr> <dbl>
## 1 cfc closed _ ambient _ ambient a a3 abiba 3148
## 2 cfc closed _ ambient _ ambient a a3 abiba 3148
## 3 cfc closed _ ambient _ ambient a a3 abiba 3148
## 4 cfc closed _ ambient _ ambient a a3 abiba 3148
## 5 cfc closed _ ambient _ ambient a a3 abiba 3148
## 6 cfc closed _ ambient _ ambient a a3 abiba 3148
## 7 cfc closed _ ambient _ ambient a a3 abiba 3148
## 8 cfc closed _ ambient _ ambient a a3 abiba 3148
## 9 cfc closed _ ambient _ ambient a a3 abiba 3148
## 10 cfc closed _ ambient _ ambient a a3 abiba 3148
## # ℹ 4,116,475 more rows
## # ℹ 6 more variables: cohort <dbl>, year <dbl>, doy <dbl>, phenophase <fct>,
## # status <dbl>, notes <chr>
Question: What do I do about those with non-NA notes?
set.seed(1)
dat_phenophase %>%
filter(!is.na(notes)) %>%
group_by(notes) %>%
select(plot, species, barcode, year, doy, phenophase, status, notes) %>%
sample_n(1)
## # A tibble: 2,565 × 8
## # Groups: notes [2,565]
## plot species barcode year doy phenophase status notes
## <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr>
## 1 l7 rhaca 40843 2019 108 budbreak 0 "\""
## 2 k1 pinst 6844 2011 252 senescence 1 "\"\"dupliate deleted ba…
## 3 l2 poptr 40316 2020 310 budbreak 0 "\"not so much scenesced…
## 4 f3 acesa 2209 2011 137 oneleaf 0 "\"row added based on lh…
## 5 f6 fraal 38220 2018 121 leafdrop 0 "\"so close\""
## 6 j8 rhaca 39201 2019 108 mostleaf 0 "(formerly k13)"
## 7 h5 picgl 34593 2017 306 oneleaf 0 "(missing measurement? -…
## 8 k6 acesa 39967 2019 108 mostleaf 0 "(now j2)"
## 9 k7 betpa 40087 2019 108 mostleaf 0 "(now some l11)"
## 10 a3 picma 31319 2018 121 budbreak 0 "(picma?)"
## # ℹ 2,555 more rows
Multiple entries conflicting
dat_phenophase_full <- tidy_phenophase(distinct = F)
dat_phenophase_dup_group <- anti_join(dat_phenophase_full, dat_phenophase) %>%
distinct(site, canopy, heat, heat_name, water, water_name, block, plot, species, barcode, cohort, year, doy, phenophase, notes)
dat_phenophase_dup <- dat_phenophase_full %>%
right_join(dat_phenophase_dup_group)
dat_phenophase_dup %>%
arrange(site, canopy, heat, heat_name, water, water_name, block, plot, species, barcode, cohort, year, doy, phenophase, notes) %>%
select(plot, species, barcode, year, doy, phenophase, status, notes)
## # A tibble: 3,799 × 8
## plot species barcode year doy phenophase status notes
## <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr>
## 1 b8 picgl 32534 2018 114 budbreak 0 <NA>
## 2 b8 picgl 32534 2018 114 oneleaf 0 <NA>
## 3 b8 picgl 32534 2018 114 mostleaf 0 <NA>
## 4 b8 picgl 32534 2018 114 senescence 0 <NA>
## 5 b8 picgl 32534 2018 114 leafdrop 0 <NA>
## 6 b8 picgl 32534 2018 121 budbreak 0 <NA>
## 7 b8 picgl 32534 2018 121 oneleaf 0 <NA>
## 8 b8 picgl 32534 2018 121 mostleaf 0 <NA>
## 9 b8 picgl 32534 2018 121 senescence 0 <NA>
## 10 b8 picgl 32534 2018 121 leafdrop 0 <NA>
## # ℹ 3,789 more rows
Time of budbreak
dat_phenophase_time <- calc_phenophase_time(dat_phenophase)
p_phenophase_change <- plot_phenophase_change(dat_phenophase_time)
p_phenophase_change$budbreak
- 2012 seems to have more late outliers.
- If they were second flush, why were first flush never observed?
Example of outlier.
set.seed(1)
dat_phenophase_outlier <- dat_phenophase_time %>%
filter(phenophase == "budbreak", doy > 180) %>%
sample_n(1)
dat_phenophase_outlier %>% select(plot, species, barcode, year, phenophase, doy, notes)
## # A tibble: 1 × 7
## plot species barcode year phenophase doy notes
## <chr> <chr> <dbl> <dbl> <fct> <dbl> <chr>
## 1 k5 picgl 20035 2013 budbreak 193 <NA>
dat_phenophase %>%
right_join(dat_phenophase_outlier %>% select(-doy)) %>%
filter(abs(doy - dat_phenophase_outlier$doy) <= 30) %>%
select(plot, species, barcode, year, doy, phenophase, status, notes)
## # A tibble: 17 × 8
## plot species barcode year doy phenophase status notes
## <chr> <chr> <dbl> <dbl> <dbl> <fct> <dbl> <chr>
## 1 k5 picgl 20035 2013 165 budbreak 0 <NA>
## 2 k5 picgl 20035 2013 168 budbreak 0 <NA>
## 3 k5 picgl 20035 2013 171 budbreak 0 <NA>
## 4 k5 picgl 20035 2013 175 budbreak 0 <NA>
## 5 k5 picgl 20035 2013 179 budbreak 0 <NA>
## 6 k5 picgl 20035 2013 182 budbreak 0 <NA>
## 7 k5 picgl 20035 2013 185 budbreak 0 <NA>
## 8 k5 picgl 20035 2013 189 budbreak 0 <NA>
## 9 k5 picgl 20035 2013 193 budbreak 1 <NA>
## 10 k5 picgl 20035 2013 197 budbreak 1 <NA>
## 11 k5 picgl 20035 2013 200 budbreak 1 <NA>
## 12 k5 picgl 20035 2013 204 budbreak 1 <NA>
## 13 k5 picgl 20035 2013 207 budbreak 1 <NA>
## 14 k5 picgl 20035 2013 211 budbreak 1 <NA>
## 15 k5 picgl 20035 2013 214 budbreak 0 <NA>
## 16 k5 picgl 20035 2013 218 budbreak 0 <NA>
## 17 k5 picgl 20035 2013 221 budbreak 1 <NA>
Decided to remove outliers manually using time window.
dat_phenophase_time_clean <- calc_phenophase_time(dat_phenophase, remove_outlier = T)
dat_phenophase_clean <- tidy_phenophase(remove_outlier = T, dat_phenophase_time = dat_phenophase_time_clean)
setwd("phenologyb4warmed/")
usethis::use_data(dat_phenophase_time_clean, overwrite = T)
usethis::use_data(dat_phenophase_clean, overwrite = T)
setwd("..")
p_phenophase_change <- plot_phenophase_change(dat_phenophase_time_clean)
p_phenophase_change$budbreak
Better now.
Phenophases to ordinal data
dat_phenophase_ordinal <- tidy_phenophase_ordinal(dat_phenophase, season = "spring", keepfirst = F)
dat_phenophase_ordinal %>%
select(plot, species, barcode, year, doy, budbreak, oneleaf, mostleaf, stage) %>%
distinct(budbreak, oneleaf, mostleaf, stage, .keep_all = T)
## # A tibble: 8 × 9
## plot species barcode year doy budbreak oneleaf mostleaf stage
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 a3 abiba 3148 2010 82 0 0 0 <NA>
## 2 a3 abiba 3148 2010 137 1 0 0 budbreak
## 3 a3 abiba 3148 2010 154 1 1 0 oneleaf
## 4 a3 abiba 3148 2010 168 1 1 1 mostleaf
## 5 a3 abiba 3148 2011 189 0 1 0 oneleaf
## 6 a3 abiba 3148 2011 208 0 0 1 mostleaf
## 7 a5 picgl 3440 2011 193 1 0 1 mostleaf
## 8 a5 queru 3491 2011 193 0 1 1 mostleaf
- Does all No mean dormancy? (No.)
- If not, how to get dormancy? (Can assume that any time after we stopped surveys in the fall and before we started surveys in the spring is the dormancy time.)
- Yes for multiple categories? (Because some data are filled in, not observed. Use time of first yes only.)
Climate data
dat_climate_daily <- read_climate(option = "daily")
plot_climate(dat_climate_daily, plotoi = "a1")
- Maximum hourly rainfall seems to be 2160 inches. Was there something wrong? (Now fixed.)
- Were the rainfall at reduced rainfall plots measured? (No. Now rainfall removal flag implemented.)
- Difference in aboveground temperature with ambient plot seems big in 2014 and 2019? (Not sure yet.)