3  Sunlight Data

Objectives

This notebook explains how to obtain sunlight data from donneespubliques.meteofrance.fr (consult the variable dictionary)

Let us first load {tidyverse}:

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

3.1 Download and extract the weather data

The data is available in monthly summaries. We need to create some function to download those summaries and then extract the information.

#' Downloads the monthly summary of the weather. 
#' Puts the results in `data/meteo/tmp/`
#' 
#' @param lien url of the summary to be downloaded
#' @return returns nothing
#' lien <- paste0(
#'   "https://donneespubliques.meteofrance.fr/donnees_libres/",
#'   "Txt/Climat/climat.201903.csv.gz"
#' )
telecharger_mess <- function(lien) {
  filename <- 
    lien |> 
    str_split("/") |> 
    last() |> 
    last()
  download.file(lien, destfile = str_c("../data/meteo/tmp/", filename))
}

Let us loop over the months from 1990 to 2019. At each iteration, we can download the monthy summary:

pb <- txtProgressBar(min = 1990, max = 2019, style = 2)
for (year in 1990:2019) {
  for (month in 1:12) {
    lien <- 
      str_c(
        "https://donneespubliques.meteofrance.fr/",
        "donnees_libres/Txt/Climat/",
        "climat.", year,
        str_pad(month, width = 2, side = "left", pad = 0), ".csv.gz"
      )
    telecharger_mess(lien)
  }
  setTxtProgressBar(pb = pb, year)
}

We need to get some information relative to each weather station. The list of weather stations is available online. We have downloaded it manually and put it in data/meteo/.

stations <- read_delim(
  "../data/meteo/postesSynop.csv", 
  locale = locale(decimal_mark = "."),
  delim = ";"
)
Rows: 62 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (2): ID, Nom
dbl (3): Latitude, Longitude, Altitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stations
# A tibble: 62 × 5
   ID    Nom               Latitude Longitude Altitude
   <chr> <chr>                <dbl>     <dbl>    <dbl>
 1 07005 ABBEVILLE             50.1     1.83        69
 2 07015 LILLE-LESQUIN         50.6     3.10        47
 3 07020 PTE DE LA HAGUE       49.7    -1.94         6
 4 07027 CAEN-CARPIQUET        49.2    -0.456       67
 5 07037 ROUEN-BOOS            49.4     1.18       151
 6 07072 REIMS-PRUNAY          49.2     4.16        95
 7 07110 BREST-GUIPAVAS        48.4    -4.41        94
 8 07117 PLOUMANAC'H           48.8    -3.47        55
 9 07130 RENNES-ST JACQUES     48.1    -1.73        36
10 07139 ALENCON               48.4     0.110      143
# ℹ 52 more rows

Let us list all the downloaded monthly summaries:

N <- list.files(
  "../data/meteo/tmp", pattern = "\\.csv\\.gz", full.names = TRUE
)

We can extract the data from all these files. To that end, let us create a function that will be applied to all these files.

#' Extracts sunlight data from a monthly weather summary
#'
#' @param fichier path to the file to extract data from
#' fichier <- N[1]
extract_data <- function(fichier) {
  mess_clim_mensuel <- read_csv2(gzfile(fichier))
  
  gzfile(fichier) |> 
    read_delim(locale = locale(decimal_mark = "."), delim = ";") |> 
    select(NUM_POSTE, DAT, INST)
  
}

We extract the following columns:

  • NUM_POSTE: OMM ID of the weather station
  • DAT: date of observation (year and month)
  • INST: cumulated sunlight data (in number of hours)

Let us loop over the downloaded files:

df_ensoleillement <- map_df(N, extract_data)

We only keep stations where at least 200 records are available:

stations_to_keep <- 
  df_ensoleillement |> 
  group_by(NUM_POSTE) |> 
  tally() |> 
  arrange(n) |> 
  filter(n > 200)
df_ensoleillement <- 
  df_ensoleillement |> 
  filter(NUM_POSTE %in% stations_to_keep$NUM_POSTE)

Leaving us with:

nrow(df_ensoleillement)
[1] 18163

Let us compute the monthly average:

df_ensoleillement_month <- 
  df_ensoleillement |> 
  mutate(month = lubridate::month(DAT)) |> 
  group_by(NUM_POSTE, month) |> 
  filter(DAT < lubridate::ymd("2011-12-31")) |> 
  summarise(INST = mean(INST, na.rm=TRUE))
`summarise()` has grouped output by 'NUM_POSTE'. You can override using the
`.groups` argument.

And let us add the stations informations:

df_ensoleillement_month <- 
  df_ensoleillement_month |> 
  left_join(stations, by = c("NUM_POSTE" = "ID"))
df_ensoleillement_month
# A tibble: 684 × 7
# Groups:   NUM_POSTE [57]
   NUM_POSTE month  INST Nom       Latitude Longitude Altitude
   <chr>     <dbl> <dbl> <chr>        <dbl>     <dbl>    <dbl>
 1 07005         1  68.4 ABBEVILLE     50.1      1.83       69
 2 07005         2  78.2 ABBEVILLE     50.1      1.83       69
 3 07005         3 128.  ABBEVILLE     50.1      1.83       69
 4 07005         4 181.  ABBEVILLE     50.1      1.83       69
 5 07005         5 205   ABBEVILLE     50.1      1.83       69
 6 07005         6 207   ABBEVILLE     50.1      1.83       69
 7 07005         7 220.  ABBEVILLE     50.1      1.83       69
 8 07005         8 208.  ABBEVILLE     50.1      1.83       69
 9 07005         9 159.  ABBEVILLE     50.1      1.83       69
10 07005        10 114   ABBEVILLE     50.1      1.83       69
# ℹ 674 more rows

3.2 Interpolation of sunlight data: Kriging

We will perform some kriging methods to interpolate the average monthly sunlight to each point of a grid covering Metropolitan France. We load a shapefile of France (from Institut National de l’Information Géographique et Forestière). The data can be downloaded from the French Open platform for French public data: Geofla.

3.2.1 At the level of French “départements”

library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
map_departements <- 
  sf::read_sf("../data/France_departements_ign/DEPARTEMENT.SHP") |> 
  sf::st_transform(crs = 4326)

Let us extract a bounding box for France:

bbox_france <- st_bbox(map_departements)
bbox_france
     xmin      ymin      xmax      ymax 
-5.138996 41.362165  9.559615 51.089001 

Let us keep only weather stations located in Metropolitan France:

df_ensoleillement_month <- 
  df_ensoleillement_month |> 
  filter(
    Longitude >= bbox_france["xmin"],
    Longitude <= bbox_france["xmax"],
    Latitude >= bbox_france["ymin"],
    Latitude <= bbox_france["ymax"]
  )

The coordinates of these weather stations:

coords_stations <- 
  df_ensoleillement_month |> 
  select("Longitude", "Latitude") |> unique() |> 
  st_as_sf(coords = c("Longitude", "Latitude")) |> 
  sf::st_set_crs(4326)
Adding missing grouping variables: `NUM_POSTE`

Let us now define the grid (thanks to Jareck Kotowski for answering his own question on StackOverflow):

size_cell <- .5
long_grid <- seq(bbox_france["xmin"], bbox_france["xmax"], by = size_cell)
lat_grid <- seq(bbox_france["ymin"], bbox_france["ymax"], by = size_cell)
france_grid <- expand.grid(long = long_grid, lat = lat_grid)
head(france_grid)
       long      lat
1 -5.138996 41.36216
2 -4.638996 41.36216
3 -4.138996 41.36216
4 -3.638996 41.36216
5 -3.138996 41.36216
6 -2.638996 41.36216

We need to change this grid to a sf object:

france_grid_sf <- 
  france_grid |> 
  st_as_sf(coords = c("long", "lat"), 
           crs = 4326, agr = "constant")
france_grid_sf
Simple feature collection with 600 features and 0 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -5.138996 ymin: 41.36216 xmax: 9.361004 ymax: 50.86216
Geodetic CRS:  WGS 84
First 10 features:
                      geometry
1   POINT (-5.138996 41.36216)
2   POINT (-4.638996 41.36216)
3   POINT (-4.138996 41.36216)
4   POINT (-3.638996 41.36216)
5   POINT (-3.138996 41.36216)
6   POINT (-2.638996 41.36216)
7   POINT (-2.138996 41.36216)
8   POINT (-1.638996 41.36216)
9   POINT (-1.138996 41.36216)
10 POINT (-0.6389965 41.36216)

Then, it can be converted to a sp object (needed because it is the expected input for krigin function):

france_grid_sp <- as_Spatial(france_grid_sf)

Then, we can interpolate sunlight for each point of the grid. To that end, we use the krige() function from {gstat}. The varigram model is trained using the variogram() function from the same package.

3.2.1.1 Average over a long period

We create a function to perform the estimation for a specific month (we will therefore obtain climate data, as we will give as an input weather data from a long period).

#' Monthy sunlight average for a given month
#' 
#' @param month month mois pour lequel realiser le krigeage
#' month <- 1
monthly_sunshine_grid <- function(month){
  
  # Keep only the month of interest
  df_ensoleillement_month_tmp <- 
    df_ensoleillement_month |> 
    ungroup() |> 
    filter(month == !!month) |> 
    filter(!is.na(INST))
  
  df_ensoleillement_month_tmp_sf <- 
    df_ensoleillement_month_tmp |> 
    st_as_sf(coords = c("Longitude", "Latitude"), 
             crs = 4326, agr = "constant")
  
  df_ensoleillement_month_tmp_sp <- 
    df_ensoleillement_month_tmp_sf |> 
    as_Spatial()
  
  # Variogramme
  variog_fr <- gstat::variogram(INST ~ 1, df_ensoleillement_month_tmp_sp)
  
  # Select variogram model
  variog_fr_fit <-gstat::fit.variogram(
    variog_fr,
    model = gstat::vgm("Gau")
  )
  # attr(variog_fr_fit, "singular")
  
  # Krigeage
  ensol_krigeage <- gstat::krige(
    INST ~ 1,
    df_ensoleillement_month_tmp_sp, france_grid_sp,
    model = variog_fr_fit
  )
  
  ensol_krigeage_df <- 
    ensol_krigeage |> 
    tbl_df() |> 
    rename(
      long = coords.x1, lat = coords.x2,
      ensol = var1.pred, ensol_var = var1.var
    ) |> 
    mutate(mois = !!month)
  
  ensol_krigeage_df
  
}# End of monthly_sunshine_grid()

Let us loop over all month and apply this function to each of them:

ensol_mois_regions <- map_df(1:12, monthly_sunshine_grid)
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]

We can compute the average monthly sunlight over the period 1990-2011 as follows:

ensol_krigeage_sf <- 
  ensol_mois_regions |> 
  group_by(long, lat) |>
  # Cumulative hours
  summarise(ensol = sum(ensol)) |> 
  ungroup() |> 
  mutate(
    geometry = str_c(
        "POLYGON((",
        long, lat, ", ",
        long + size_cell, lat, ", ",
        long + size_cell, lat + size_cell, ", ",
        long, lat + size_cell,", ", 
        long, lat,
        "))",
        sep = " "
    )
  ) |> 
  st_as_sf(coords = c("long", "lat"), wkt = "geometry", crs = 4326)
`summarise()` has grouped output by 'long'. You can override using the
`.groups` argument.
ensol_krigeage_sf
Simple feature collection with 600 features and 3 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -5.138996 ymin: 41.36216 xmax: 9.861004 ymax: 51.36216
Geodetic CRS:  WGS 84
# A tibble: 600 × 4
    long   lat ensol                                                    geometry
 * <dbl> <dbl> <dbl>                                               <POLYGON [°]>
 1 -5.14  41.4 1986. ((-5.138996 41.36216, -4.638996 41.36216, -4.638996 41.862…
 2 -5.14  41.9 1989. ((-5.138996 41.86216, -4.638996 41.86216, -4.638996 42.362…
 3 -5.14  42.4 1995. ((-5.138996 42.36216, -4.638996 42.36216, -4.638996 42.862…
 4 -5.14  42.9 2001. ((-5.138996 42.86216, -4.638996 42.86216, -4.638996 43.362…
 5 -5.14  43.4 2006. ((-5.138996 43.36216, -4.638996 43.36216, -4.638996 43.862…
 6 -5.14  43.9 2007. ((-5.138996 43.86216, -4.638996 43.86216, -4.638996 44.362…
 7 -5.14  44.4 2001. ((-5.138996 44.36216, -4.638996 44.36216, -4.638996 44.862…
 8 -5.14  44.9 1987. ((-5.138996 44.86216, -4.638996 44.86216, -4.638996 45.362…
 9 -5.14  45.4 1963. ((-5.138996 45.36216, -4.638996 45.36216, -4.638996 45.862…
10 -5.14  45.9 1927. ((-5.138996 45.86216, -4.638996 45.86216, -4.638996 46.362…
# ℹ 590 more rows

And then aggregate the values at the French region level:

ensol_regions <- 
  ensol_krigeage_sf |>
  sf::st_join(map_departements, join = st_intersects, left = FALSE) |> 
  sf::st_set_geometry(NULL) |>
  select(CODE_DEPT, ensol) |> 
  group_by(CODE_DEPT) |> 
  summarise(ensol = mean(ensol))
ensol_regions
# A tibble: 96 × 2
   CODE_DEPT ensol
   <chr>     <dbl>
 1 01        1967.
 2 02        1779.
 3 03        1898.
 4 04        2688.
 5 05        2542.
 6 06        2753.
 7 07        2299.
 8 08        1786.
 9 09        2094.
10 10        1840.
# ℹ 86 more rows

Let us visualize the results:

p_ensol <-
  ggplot(data = map_departements |> left_join(ensol_regions)) +
  geom_sf(mapping = aes(fill = ensol)) +
  scale_fill_gradient("Sunlight (hours)", low="yellow", high="red") +
  labs(title = "Cumulative hours of sunshine per year\n(average 1990-2011)")
Joining with `by = join_by(CODE_DEPT)`
p_ensol

3.2.1.2 Sunlight for 2011

Let us look at the cumulative value for 2011:

df_ensoleillement_2011 <- 
  df_ensoleillement |> 
  mutate(month = lubridate::month(DAT), year = lubridate::year(DAT)) |> 
  filter(year == 2011) |> 
  filter(!is.na(INST)) |> 
  group_by(NUM_POSTE) |> 
  mutate(n = n()) |> 
  filter(n == 12) |> 
  group_by(NUM_POSTE) |> 
  summarise(cumul_ensol = sum(INST, na.rm=TRUE)) |> 
  left_join(stations, by = c("NUM_POSTE" = "ID"))
df_ensoleillement_2011
# A tibble: 41 × 6
   NUM_POSTE cumul_ensol Nom                 Latitude Longitude Altitude
   <chr>           <dbl> <chr>                  <dbl>     <dbl>    <dbl>
 1 07005            1591 ABBEVILLE               50.1     1.83        69
 2 07015            1705 LILLE-LESQUIN           50.6     3.10        47
 3 07027            1905 CAEN-CARPIQUET          49.2    -0.456       67
 4 07037            1611 ROUEN-BOOS              49.4     1.18       151
 5 07110            1610 BREST-GUIPAVAS          48.4    -4.41        94
 6 07130            1935 RENNES-ST JACQUES       48.1    -1.73        36
 7 07139            1821 ALENCON                 48.4     0.110      143
 8 07149            1882 ORLY                    48.7     2.38        89
 9 07168            2014 TROYES-BARBEREY         48.3     4.02       112
10 07190            1898 STRASBOURG-ENTZHEIM     48.5     7.64       150
# ℹ 31 more rows

Filtering the results to keep only stations in Metropolitan France:

df_ensoleillement_2011 <- 
  df_ensoleillement_2011 |> 
  filter(
    Longitude >= bbox_france["xmin"],
    Longitude <= bbox_france["xmax"],
    Latitude >= bbox_france["ymin"],
    Latitude <= bbox_france["ymax"]
  )

The coordinates of weather stations:

coords_stations_2011 <- 
  df_ensoleillement_2011 |> 
  select("Longitude", "Latitude") |> unique() |> 
  st_as_sf(coords = c("Longitude", "Latitude")) |> 
  sf::st_set_crs(4326)

And a graph of where those stations are located:

p_loc_stations_2011 <- 
  ggplot() +
  geom_sf(data = map_departements) +
  scale_fill_discrete(guide = FALSE) +
  geom_sf(data = coords_stations_2011) +
  labs(title = "Location of weather stations in 2011")
p_loc_stations_2011
Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
ggplot2 3.3.4.
ℹ Please use "none" instead.

Preparation of the sp object for the grid covering France:

france_grid_sf <- 
  france_grid |> 
  st_as_sf(coords = c("long", "lat"), 
           crs = 4326, agr = "constant")
france_grid_sp <- as_Spatial(france_grid_sf)

Let us turn df_ensoleillement_2011 to an sf object, and then as an sp one:

df_ensoleillement_2011_sf <- 
  df_ensoleillement_2011 |> 
  st_as_sf(coords = c("Longitude", "Latitude"), 
           crs = 4326, agr = "constant")
df_ensoleillement_2011_sp <- 
  df_ensoleillement_2011_sf |> 
  as_Spatial()

Now let us turn to the interpolation. First, let us fit a variogram model to the data:

variog_fr <- 
  gstat::variogram(cumul_ensol ~ 1, df_ensoleillement_2011_sp)
plot(variog_fr, t="b")

Let us select a variogram model:

variog_fr_fit <- gstat::fit.variogram(variog_fr, model = gstat::vgm("Gau"))

Checking whether the algorithm converged:

attr(variog_fr_fit, "singular")
[1] FALSE

Quick graph of the fit:

plot(variog_fr, variog_fr_fit, main = 'Gaussian model')

Now let us interpolate the data at each point of the grid:

ensol_krigeage <- 
  gstat::krige(cumul_ensol ~ 1,
               df_ensoleillement_2011_sp, france_grid_sp,
               model=variog_fr_fit)
[using ordinary kriging]

Let us turn the results into a tibble:

ensol_krigeage_df_2011 <- 
  ensol_krigeage |> 
  tbl_df() |> 
  rename(long = coords.x1, lat = coords.x2,
         ensol = var1.pred, ensol_var = var1.var)
Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
ℹ Please use `tibble::as_tibble()` instead.
ensol_krigeage_df_2011
# A tibble: 600 × 4
     long   lat ensol ensol_var
    <dbl> <dbl> <dbl>     <dbl>
 1 -5.14   41.4 1721.   141302.
 2 -4.64   41.4 1726.   123759.
 3 -4.14   41.4 1736.   107596.
 4 -3.64   41.4 1752.    92994.
 5 -3.14   41.4 1774.    80068.
 6 -2.64   41.4 1803.    68871.
 7 -2.14   41.4 1840.    59390.
 8 -1.64   41.4 1885.    51553.
 9 -1.14   41.4 1938.    45242.
10 -0.639  41.4 1999.    40306.
# ℹ 590 more rows

Let us create an sf object from these results:

ensol_krigeage_2011_sf <- 
  ensol_krigeage_df_2011 |> 
  group_by(long, lat) |>
  # Cumulative hours
  summarise(ensol = sum(ensol)) |> 
  ungroup() |> 
  mutate(
    geometry = 
      str_c(
        "POLYGON((",
        long, lat, ", ",
        long + size_cell, lat, ", ",
        long + size_cell, lat + size_cell, ", ",
        long, lat + size_cell,", ", 
        long, lat,
        "))",
        sep = " "
      )) |> 
  st_as_sf(coords = c("long", "lat"), wkt = "geometry",
           crs = 4326)
`summarise()` has grouped output by 'long'. You can override using the
`.groups` argument.

And then we can get the cumulative sunlight over the year, by French département:

ensol_regions_2011 <- 
  ensol_krigeage_2011_sf |>
  sf::st_join(map_departements, join = st_intersects, left = FALSE) |> 
  sf::st_set_geometry(NULL) |>
  select(CODE_DEPT, ensol) |> 
  group_by(CODE_DEPT) |> 
  summarise(ensol = mean(ensol))

And now the results can be graphed:

p_2011 <- ggplot(data = map_departements |> left_join(ensol_regions_2011)) +
  geom_sf(aes(fill = ensol)) +
  scale_fill_gradient("Sunlight (hours)", low="yellow", high="red") +
  labs(title = "Cumulated hours of sunshine (2011)")
Joining with `by = join_by(CODE_DEPT)`
p_2011

3.2.2 At the level of French “régions”

Now let us interpolate sunlight over the French Regions (with their geographical boundaries as of 2014).

load("../data/map_regions_2014.rda")

The bounding box:

bbox_france <- st_bbox(map_regions_2014)
old-style crs object detected; please recreate object with a recent sf::st_crs()

Keeping only weather stations from Metropolitan France:

df_ensoleillement_month <- 
  df_ensoleillement_month |> 
  filter(Longitude >= bbox_france["xmin"],
         Longitude <= bbox_france["xmax"],
         Latitude >= bbox_france["ymin"],
         Latitude <= bbox_france["ymax"])

Coordinates of those stations:

coords_stations <- 
  df_ensoleillement_month |> 
  select("Longitude", "Latitude") |> unique() |> 
  st_as_sf(coords = c("Longitude", "Latitude")) |> 
  sf::st_set_crs(4326)
Adding missing grouping variables: `NUM_POSTE`
coords_stations
Simple feature collection with 40 features and 1 field
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -4.412 ymin: 41.918 xmax: 9.485167 ymax: 50.57
Geodetic CRS:  WGS 84
# A tibble: 40 × 2
# Groups:   NUM_POSTE [40]
   NUM_POSTE             geometry
 * <chr>              <POINT [°]>
 1 07005           (1.834 50.136)
 2 07015           (3.0975 50.57)
 3 07020     (-1.939833 49.72517)
 4 07027        (-0.456167 49.18)
 5 07037        (1.181667 49.383)
 6 07110        (-4.412 48.44417)
 7 07117     (-3.473167 48.82583)
 8 07130        (-1.734 48.06883)
 9 07139       (0.110167 48.4455)
10 07149      (2.384333 48.71683)
# ℹ 30 more rows

The kriging wraping functions:

#' Average Monthly sunlight for a given month (interpolated with kriging methods)
#' at the French Region level
#' @param month month to select for kriging
#' month <- 1
monthly_sunshine_grid <- function(month){
  
  # Keep only the month of interest
  df_ensoleillement_month_tmp <- 
    df_ensoleillement_month |> 
    ungroup() |> 
    filter(month == !!month) |> 
    filter(!is.na(INST))
  
  df_ensoleillement_month_tmp_sf <- 
    df_ensoleillement_month_tmp |> 
    st_as_sf(coords = c("Longitude", "Latitude"), 
             crs = 4326, agr = "constant")
  
  df_ensoleillement_month_tmp_sp <- 
    df_ensoleillement_month_tmp_sf |> 
    as_Spatial()
  
  # Variogram
  variog_fr <- 
    gstat::variogram(INST ~ 1, df_ensoleillement_month_tmp_sp)
  
  # Select variogram model
  variog_fr_fit <- gstat::fit.variogram(
    variog_fr,
    model = gstat::vgm("Gau")
  )
  attr(variog_fr_fit, "singular")
  
  # Kriging
  ensol_krigeage <- gstat::krige(INST ~ 1,
                                 df_ensoleillement_month_tmp_sp, france_grid_sp,
                                 model=variog_fr_fit)
  
  ensol_krigeage_df <- 
    ensol_krigeage |> 
    tbl_df() |> 
    rename(long = coords.x1, lat = coords.x2,
           ensol = var1.pred, ensol_var = var1.var) |> 
    mutate(mois = !!month)
  
  ensol_krigeage_df
  
}# End of monthly_sunshine_grid()

Looping over the months:

ensol_mois_regions <- map_df(1:12, monthly_sunshine_grid)
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]
[using ordinary kriging]

3.2.2.1 Average over a long period

Annual average of cumulative over the 1990-2011 period:

ensol_krigeage_sf <- 
  ensol_mois_regions |> 
  group_by(long, lat) |>
  # Cumulative hours
  summarise(ensol = sum(ensol)) |> 
  ungroup() |> 
  mutate(
    geometry = 
      str_c(
        "POLYGON((",
        long, lat, ", ",
        long + size_cell, lat, ", ",
        long + size_cell, lat + size_cell, ", ",
        long, lat + size_cell,", ", 
        long, lat,
        "))",
        sep = " "
      )) |> 
  st_as_sf(coords = c("long", "lat"), wkt = "geometry",
           crs = 4326)
`summarise()` has grouped output by 'long'. You can override using the
`.groups` argument.

Values on a tibble, each line representing a French Region:

ensol_regions <- 
  ensol_krigeage_sf |>
  sf::st_join(map_regions_2014, join = st_intersects, left = FALSE) |> 
  sf::st_set_geometry(NULL) |>
  select(code_insee, ensol) |> 
  group_by(code_insee) |> 
  summarise(ensol = mean(ensol))
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
ensol_regions
# A tibble: 22 × 2
   code_insee ensol
   <chr>      <dbl>
 1 11         1806.
 2 21         1809.
 3 22         1745.
 4 23         1708.
 5 24         1844.
 6 25         1734.
 7 26         1851.
 8 31         1688.
 9 41         1787.
10 42         1777.
# ℹ 12 more rows

Visualize on a map:

p_ensol <-
  ggplot(data = map_regions_2014 |> 
  left_join(ensol_regions)) +
  geom_sf(aes(fill = ensol)) +
  scale_fill_gradient("Sunlight (hours)", low="yellow", high="red") +
  labs(title = "Cumulative hours of sunshine per year\n(average 1990-2011)")
Joining with `by = join_by(code_insee)`
old-style crs object detected; please recreate object with a recent
sf::st_crs()
p_ensol

3.2.2.2 Sunlight for 2011

df_ensoleillement_2011 <- 
  df_ensoleillement |> 
  mutate(month = lubridate::month(DAT), year = lubridate::year(DAT)) |> 
  filter(year == 2011) |> 
  filter(!is.na(INST)) |> 
  group_by(NUM_POSTE) |> 
  mutate(n = n()) |> 
  filter(n == 12) |> 
  group_by(NUM_POSTE) |> 
  summarise(cumul_ensol = sum(INST, na.rm=TRUE)) |> 
  left_join(stations, by = c("NUM_POSTE" = "ID"))
df_ensoleillement_2011
# A tibble: 41 × 6
   NUM_POSTE cumul_ensol Nom                 Latitude Longitude Altitude
   <chr>           <dbl> <chr>                  <dbl>     <dbl>    <dbl>
 1 07005            1591 ABBEVILLE               50.1     1.83        69
 2 07015            1705 LILLE-LESQUIN           50.6     3.10        47
 3 07027            1905 CAEN-CARPIQUET          49.2    -0.456       67
 4 07037            1611 ROUEN-BOOS              49.4     1.18       151
 5 07110            1610 BREST-GUIPAVAS          48.4    -4.41        94
 6 07130            1935 RENNES-ST JACQUES       48.1    -1.73        36
 7 07139            1821 ALENCON                 48.4     0.110      143
 8 07149            1882 ORLY                    48.7     2.38        89
 9 07168            2014 TROYES-BARBEREY         48.3     4.02       112
10 07190            1898 STRASBOURG-ENTZHEIM     48.5     7.64       150
# ℹ 31 more rows

Metropolitan French stations only:

df_ensoleillement_2011 <- 
  df_ensoleillement_2011 |> 
  filter(Longitude >= bbox_france["xmin"],
         Longitude <= bbox_france["xmax"],
         Latitude >= bbox_france["ymin"],
         Latitude <= bbox_france["ymax"])

Their coordinates:

coords_stations_2011 <- 
  df_ensoleillement_2011 |> 
  select("Longitude", "Latitude") |> unique() |> 
  st_as_sf(coords = c("Longitude", "Latitude")) |> 
  sf::st_set_crs(4326)

Visualize their locations:

p_loc_stations_2011 <- 
  ggplot() +
  geom_sf(data = map_regions_2014) +
  scale_fill_discrete(guide = FALSE) +
  geom_sf(data = coords_stations_2011) +
  labs(title = "Location of weather stations in 2011")
p_loc_stations_2011
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()

Preparing the sp object for kriging:

df_ensoleillement_2011_sf <- 
  df_ensoleillement_2011 |> 
  st_as_sf(coords = c("Longitude", "Latitude"), 
           crs = 4326, agr = "constant")
df_ensoleillement_2011_sp <- 
  df_ensoleillement_2011_sf |> 
  as_Spatial()

The variogram model can be fitted:

variog_fr <- 
  gstat::variogram(cumul_ensol ~ 1, df_ensoleillement_2011_sp)

Plotting the results:

plot(variog_fr, t="b")

Selection of the best fit:

variog_fr_fit <- gstat::fit.variogram(variog_fr, model = gstat::vgm("Gau"))

Checking whether the algorithm converged:

attr(variog_fr_fit, "singular")
[1] FALSE

Kriging:

ensol_krigeage <- gstat::krige(
  cumul_ensol ~ 1,
  df_ensoleillement_2011_sp, france_grid_sp,
  model=variog_fr_fit
)
[using ordinary kriging]

The interpolated values can be stored on a tibble:

ensol_krigeage_df_2011 <- 
  ensol_krigeage |> 
  tbl_df() |> 
  rename(
    long = coords.x1, lat = coords.x2,
    ensol = var1.pred, ensol_var = var1.var
  )
Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
ℹ Please use `tibble::as_tibble()` instead.
ensol_krigeage_df_2011
# A tibble: 600 × 4
     long   lat ensol ensol_var
    <dbl> <dbl> <dbl>     <dbl>
 1 -5.14   41.4 1721.   141302.
 2 -4.64   41.4 1726.   123759.
 3 -4.14   41.4 1736.   107596.
 4 -3.64   41.4 1752.    92994.
 5 -3.14   41.4 1774.    80068.
 6 -2.64   41.4 1803.    68871.
 7 -2.14   41.4 1840.    59390.
 8 -1.64   41.4 1885.    51553.
 9 -1.14   41.4 1938.    45242.
10 -0.639  41.4 1999.    40306.
# ℹ 590 more rows

And those can be put on an sf object:

ensol_krigeage_2011_sf <- 
  ensol_krigeage_df_2011 |> 
  group_by(long, lat) |>
  # Cumul heures
  summarise(ensol = sum(ensol)) |> 
  ungroup() |> 
  mutate(
    geometry = 
      str_c(
        "POLYGON((",
        long, lat, ", ",
        long + size_cell, lat, ", ",
        long + size_cell, lat + size_cell, ", ",
        long, lat + size_cell,", ", 
        long, lat,
        "))",
        sep = " "
      )) |> 
  st_as_sf(coords = c("long", "lat"), wkt = "geometry",
           crs = 4326)
`summarise()` has grouped output by 'long'. You can override using the
`.groups` argument.

The average value at the region level, for 2011:

ensol_regions_2011 <- 
  ensol_krigeage_2011_sf |>
  sf::st_join(map_regions_2014, join = st_intersects, left = FALSE) |> 
  sf::st_set_geometry(NULL) |>
  select(code_insee, ensol) |> 
  group_by(code_insee) |> 
  summarise(ensol = mean(ensol))
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
old-style crs object detected; please recreate object with a recent sf::st_crs()
ensol_regions_2011
# A tibble: 22 × 2
   code_insee ensol
   <chr>      <dbl>
 1 11         1876.
 2 21         1880.
 3 22         1771.
 4 23         1790.
 5 24         2004.
 6 25         1819.
 7 26         2102.
 8 31         1681.
 9 41         1915.
10 42         1991.
# ℹ 12 more rows

Choropleth map of these values:

p_2011 <- 
  ggplot(data = map_regions_2014 |> left_join(ensol_regions_2011)) +
  geom_sf(aes(fill = ensol)) +
  scale_fill_gradient("Sunlight (hours)", low="yellow", high="red") +
  labs(title = "Cumulated hours of sunshine (2011)")
Joining with `by = join_by(code_insee)`
old-style crs object detected; please recreate object with a recent
sf::st_crs()
p_2011

Lastly, these results can be saved into an RData object:

ensol_regions <- 
  ensol_regions |> 
  rename(ensol_1990_2011 = ensol) |> 
  left_join(ensol_regions_2011 |> rename(ensol_2011 = ensol))
ensol_regions

save(ensol_regions, file = "../data/meteo/ensol_regions.rda")