Determine Season from Date Using Lubridate in R

Determine season from Date using lubridate in R

I packaged @Lars Arne Jordanger's much more elegant approach into a function:

getTwoSeasons <- function(input.date){
numeric.date <- 100*month(input.date)+day(input.date)
## input Seasons upper limits in the form MMDD in the "break =" option:
cuts <- base::cut(numeric.date, breaks = c(0,415,1015,1231))
# rename the resulting groups (could've been done within cut(...levels=) if "Winter" wasn't double
levels(cuts) <- c("Winter", "Summer","Winter")
return(cuts)
}

Testing it on some sample data seems to work fine:

getTwoSeasons(as.POSIXct("2016-01-01 12:00:00")+(0:365)*(60*60*24))

Find which season a particular date belongs to

How about using something like this:

getSeason <- function(DATES) {
WS <- as.Date("2012-12-15", format = "%Y-%m-%d") # Winter Solstice
SE <- as.Date("2012-3-15", format = "%Y-%m-%d") # Spring Equinox
SS <- as.Date("2012-6-15", format = "%Y-%m-%d") # Summer Solstice
FE <- as.Date("2012-9-15", format = "%Y-%m-%d") # Fall Equinox

# Convert dates from any year to 2012 dates
d <- as.Date(strftime(DATES, format="2012-%m-%d"))

ifelse (d >= WS | d < SE, "Winter",
ifelse (d >= SE & d < SS, "Spring",
ifelse (d >= SS & d < FE, "Summer", "Fall")))
}

my.dates <- as.Date("2011-12-01", format = "%Y-%m-%d") + 0:60
head(getSeason(my.dates), 24)
# [1] "Fall" "Fall" "Fall" "Fall" "Fall" "Fall" "Fall"
# [8] "Fall" "Fall" "Fall" "Fall" "Fall" "Fall" "Fall"
# [15] "Winter" "Winter" "Winter" "Winter" "Winter" "Winter"

One note: 2012 is a good year to which to convert all of the dates; since it is a leap year, any February 29ths in your data set will be handled smoothly.

Changing months to seasons

Here is an option where we create a named vector and use that to match and replace the 'Month' to create new column

library(dplyr)
nm1 <- setNames(rep(c("Winter", "Spring", "Summer", "Fall"),
each = 3), month.name)
df1 %>%
mutate(Season = nm1[Month])

-output

#         Date     Month Temperature Season
#1 2016-07-01 July 13 Summer
#2 2017-01-08 January 5 Winter
#3 2018-09-19 September 11 Summer
#4 2019-10-24 October 9 Fall

Or it can be done in base R

df1$Season = nm1[df1$Month]

data

df1 <- structure(list(Date = c("2016-07-01", "2017-01-08", "2018-09-19", 
"2019-10-24"), Month = c("July", "January", "September", "October"
), Temperature = c(13L, 5L, 11L, 9L)), class = "data.frame",
row.names = c(NA,
-4L))

How to extract Month from date in R

?month states:

Date-time must be a POSIXct, POSIXlt, Date, Period, chron, yearmon,
yearqtr, zoo, zooreg, timeDate, xts, its, ti, jul, timeSeries, and fts
objects.

Your object is a factor, not even a character vector (presumably because of stringsAsFactors = TRUE). You have to convert your vector to some datetime class, for instance to POSIXlt:

library(lubridate)
some_date <- c("01/02/1979", "03/04/1980")
month(as.POSIXlt(some_date, format="%d/%m/%Y"))
[1] 2 4

There's also a convenience function dmy, that can do the same (tip proposed by @Henrik):

month(dmy(some_date))
[1] 2 4

Going even further, @IShouldBuyABoat gives another hint that dd/mm/yyyy character formats are accepted without any explicit casting:

month(some_date)
[1] 2 4

For a list of formats, see ?strptime. You'll find that "standard unambiguous format" stands for

The default formats follow the rules of the ISO 8601 international
standard which expresses a day as "2001-02-28" and a time as
"14:01:02" using leading zeroes as here.

How to get the start date of the astronomical season from a date

As you are using lubridate then you can create a function using floor_date().

astronomical_floor <- function(x) {
stopifnot(
(is(x, "Date") | is(x, "POSIXct") | is(x, "POSIXt"))
)

astronomical_floor <- x |>
floor_date("season") |>
format("%Y-%m-21") |>
ymd()

# Make sure floor not greater than date
# e.g. 2022-06-05 should return 2022-03-21
# not 2022-06-21
if (astronomical_floor > x) {
astronomical_floor <- floor_date(
x %m+% months(-1)
) |>
floor_date("season") |>
format("%Y-%m-21") |>
ymd()
}

return(astronomical_floor)
}

x <- ymd_hms("2008-08-03 12:01:59.23")
astronomical_floor(x) # "2008-06-21"
astronomical_floor(as.Date("2020-01-01")) # "2019-12-21"
astronomical_floor(x = ymd("2022-06-05")) # "2022-03-21"

R: assign seasons to dates by day and month

Using $yday (whether from lubridate or as.POSIXlt) may give erroneous results for leap-years. I think a safer method is to create a vector of each of those dates across the years, adding one year in each direction (before/after).

I'm using findInterval, but it's close-enough to cut that you could use that method using the same variables here.

season_dates <- as.Date(sort(c(outer(
do.call(seq.int, as.list(1900 + as.POSIXlt(range(df$DATES) + c(-365, 365))$year)),
c("-03-20", "-06-21", "-09-23", "-12-21"),
paste0))))
season_dates
# [1] "2016-03-20" "2016-06-21" "2016-09-23" "2016-12-21" "2017-03-20" "2017-06-21" "2017-09-23" "2017-12-21" "2018-03-20"
# [10] "2018-06-21" "2018-09-23" "2018-12-21" "2019-03-20" "2019-06-21" "2019-09-23" "2019-12-21" "2020-03-20" "2020-06-21"
# [19] "2020-09-23" "2020-12-21"
season_names <- rep(c("Spring", "Summer", "Autumn", "Winter"), length.out = length(season_dates))
season_names
# [1] "Spring" "Summer" "Autumn" "Winter" "Spring" "Summer" "Autumn" "Winter" "Spring" "Summer" "Autumn" "Winter" "Spring"
# [14] "Summer" "Autumn" "Winter" "Spring" "Summer" "Autumn" "Winter"

set.seed(42)
as_tibble(df) %>%
mutate(SEASON = season_names[ findInterval(DATES, season_dates) ]) %>%
sample_n(10) %>%
arrange(DATES)
# # A tibble: 10 x 2
# DATES SEASON
# <date> <chr>
# 1 2017-01-24 Winter
# 2 2017-02-18 Winter
# 3 2017-06-14 Spring
# 4 2017-11-17 Autumn
# 5 2017-12-22 Winter
# 6 2018-02-14 Winter
# 7 2018-07-15 Summer
# 8 2018-09-14 Summer
# 9 2018-09-26 Autumn
# 10 2019-06-18 Spring

I sampled the output just to show some variance, as otherwise the first 10 results were all winter. Also, I used as.POSIXlt(.)$year and then had to adjust it since it is 1900-based. lubridate::year would work here, too.

How to determine the seasons of the year from a multitemporal data list using R?

1) Use findInterval to look up the date in the season_start vector and extract the associated season_name.

library(dplyr)

# given Date class vector returns vector of season names
date2season <- function(date) {
season_start <- c("0101", "0321", "0621", "0923", "1221") # mmdd
season_name <- c("Summer", "Autumn", "Winter", "Spring", "Summer")
mmdd <- format(date, "%m%d")
season_name[findInterval(mmdd, season_start)] ##
}

df %>% mutate(season = date2season(as.Date(Date_dmy, "%d/%m/%Y")))

giving:

   sample_station   Date_dmy Temperature season
1 A 01/01/2000 17 Summer
2 A 08/08/2000 20 Winter
3 A 16/03/2001 24 Summer
4 A 22/09/2001 19 Winter
5 A 01/06/2002 17 Autumn
...snip...

1a) The last line in date2season, marked ##, could optionally be replaced with

season_name[(mmdd >= "0101") + (mmdd >= "0321") + (mmdd >= "0621") + 
(mmdd >= "0923") + (mmdd >= "1221")]

and in that case you don't need the line defining season_start either.

2) An alternative is to use case_when:

df %>%
mutate(mmdd = format(as.Date(Date_dmy, "%d/%m/%Y"), "%m%d"),
season = case_when(
mmdd <= "0320" ~ "Summer",
mmdd <= "0620" ~ "Autumn",
mmdd <= "0922" ~ "Winter",
mmdd <= "1220" ~ "Spring",
TRUE ~ "Summer")) %>%
select(-mmdd)

Convert dates to season year

lubridate package is your friend:

library(lubridate)
date <- c("08/08/15", "08/09/16")

# extract year information
year <- year(mdy(date))
# paste season
season <- paste0(year, "/", year + 1)
season

How do I find floor and ceiling rounded dates using lubridate when the dates are in varying orders, but not in date format?

UPDATE WITH SOLUTION:

I have solved this problem myself, using parse_date_time() and the fact that, if a date is not in a pre-defined order, parse_date_time() fails and leaves behind an NA. I create a dataframe that I then use to figure out what format the date is in, and then create the end_date column based on the format.

I am aware that lubridate also features the function guess_formats(), but this is less helpful because the function outputs many possible formats.

Code below:

library(lubridate)
library(tidyverse)

#Create dataframe

species <- c("mudkip", "treecko", "torchic", "swampert", "galvantula", "volcarona", "joltik")
raw_date <- c("5 Jan 2013", "February 2018", "10/2001", "25/12/1984", "2020", "2012-03-12", "not a date")

# Added Volcarona to my species to have an example in which the year is first in the date format
# Added Joltik to represent a species in which the date is blank or otherwise unreadable

df <- data.frame(species, raw_date)

# Create a dataframe for formatting the dataset - you can just add these columns to the main dataframe, but I found this neater
date_formatting <- data.frame(matrix(ncol=5, nrow=length(df$raw_date)))
names(date_formatting) <- c("raw_date", "ymd","dmy", "my", "y")
date_formatting$raw_date <- df$raw_date
date_formatting$ymd <- parse_date_time(date_formatting$raw_date, orders = c("ymd"))
date_formatting$dmy <- parse_date_time(date_formatting$raw_date, orders = c("dmy"))
date_formatting$my <- parse_date_time(date_formatting$raw_date, orders = c("my"))
date_formatting$y <- parse_date_time(date_formatting$raw_date, orders = c("y"))
# You can use other date formats based on your needs.

# Find what the date format is based on what did and didn't worked
date_formatting <- date_formatting %>%
mutate(format = case_when(
!is.na(ymd) ~ "ymd",
!is.na(dmy) ~ "dmy",
!is.na(my) ~ "my",
!is.na(y) ~ "y",
# TRUE to account for dates that can't be read due to format - could leave blank.
TRUE ~ "unreadable"
))

df$format <- date_formatting$format

# Start date works as normal regardless of date format
df$start_date <- parse_date_time(df$raw_date, orders = c("ymd","dmy", "my", "y"))

# For end date, calculate values based on format
df <- df %>%
mutate(end_date = case_when(
format == "ymd" | format == "dmy" ~ parse_date_time(raw_date, orders = c("ymd","dmy")),
# When going to the end of the year/month, make sure to use %m+% to account for varying lengths of months/years.
# Subtract 1 day to get the last day of that month/year (rather than the first day of the subsequent day/year)
format == "my" ~ (parse_date_time(raw_date, orders = c("my")) %m+% months(1)) - days(1),
format == "y" ~ (parse_date_time(raw_date, orders = c("y")) %m+% years(1)) - days(1),
TRUE ~ NA_real_))

I hope this helps any future people with similar issues! If anyone has a more elegant solution I'd love to know.



Related Topics



Leave a reply



Submit