Raw data cleaning and processing to generate Health Canada diet simulation data

Author

Didier Brassard

Published

January 23, 2025

#### Set-up and Library ####
# *********************************************************************** #
#                           Set-up and Library                            #
# *********************************************************************** #
# ********************************************** #
#                    Library                     #
# ********************************************** #

## data
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidylog)
Warning: package 'tidylog' was built under R version 4.3.2

Attaching package: 'tidylog'
The following objects are masked from 'package:dplyr':

    add_count, add_tally, anti_join, count, distinct, distinct_all,
    distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
    full_join, group_by, group_by_all, group_by_at, group_by_if,
    inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
    relocate, rename, rename_all, rename_at, rename_if, rename_with,
    right_join, sample_frac, sample_n, select, select_all, select_at,
    select_if, semi_join, slice, slice_head, slice_max, slice_min,
    slice_sample, slice_tail, summarise, summarise_all, summarise_at,
    summarise_if, summarize, summarize_all, summarize_at, summarize_if,
    tally, top_frac, top_n, transmute, transmute_all, transmute_at,
    transmute_if, ungroup
The following object is masked from 'package:stats':

    filter
library(readxl)
library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(hefi2019) # Install with <devtools::install_github("didierbrassard/hefi2019")>

## presentation
library(gt)
Warning: package 'gt' was built under R version 4.3.3
## project
library(here)
here() starts at /Users/DidierBrassard/RStudio/NuAge_protocol
## suppress scientific notation
options(scipen = 9999)
# ********************************************** #
#                  Directories                   #
# ********************************************** #

## Local directory
dir_scripts <- here::here("scripts")
dir_metadata <- here::here("data", "metadata")
dir_processed <- here::here("data", "processed")
dir_raw <- here::here("data","raw")
dir_results <- here::here("data", "results")
dir_temp <- here::here("data","temp")
if(dir.exists(dir_temp)==FALSE){
  dir.create(dir_temp)
}
# ********************************************** #
#            Load functions and data             #
# ********************************************** #

source(file.path(dir_scripts, "save_and_summarize_data.R"))
#### Preparing Health Canada diet simulation ####
# *********************************************************************** #
#                Preparing Health Canada diet simulation                  #
# *********************************************************************** #

# Objective:
# 1) read and format simulation data;
# 2) output table of dietary constituents and nutrients (1 diet = wide format)
##### 1) Download/read data #####
# ********************************************** #
#               Download/read data               #
# ********************************************** #

# Omnivore
  if(file.exists(file.path(dir_raw,"HC_simulated_omni.xlsx"))==FALSE){
  download.file("https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be/resource/94064331-b1b9-41f5-9ba8-1bf77b880053/download/simulated-composite-diets-omnivore.xlsx",
                destfile =file.path(dir_raw,"HC_simulated_omni.xlsx"))
  } else {
    message("dir_raw/HC_simulated_omni.xlsx found. Date modified:", file.info(file.path(dir_raw,"HC_simulated_omni.xlsx"))$mtime)
  }
dir_raw/HC_simulated_omni.xlsx found. Date modified:2024-05-29 14:50:11.318581
# Omnivore, no beverages
  if(file.exists(file.path(dir_raw,"HC_simulated_omni_nobev.xlsx"))==FALSE){
  download.file("https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be/resource/66919cac-5d2d-4617-be01-9faafd539f2f/download/simulated-composite-diets-omnivore-no-healthy-beverages.xlsx",
                destfile =file.path(dir_raw,"HC_simulated_omni_nobev.xlsx"))
  } else {
    message("dir_raw/HC_simulated_omni_nobev.xlsx found. Date modified:", file.info(file.path(dir_raw,"HC_simulated_omni_nobev.xlsx"))$mtime)
  }
dir_raw/HC_simulated_omni_nobev.xlsx found. Date modified:2024-05-29 14:50:11.322398
# Lacto-ovo-vege
  if(file.exists(file.path(dir_raw,"HC_simulated_vege.xlsx"))==FALSE){
  download.file("https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be/resource/3813380e-9978-4d19-995b-c4617ad58019/download/simulated-composite-diets-lacto-ovo-vegetarian.xlsx",
                destfile =file.path(dir_raw,"HC_simulated_vege.xlsx"))
  } else {
    message("dir_raw/HC_simulated_vege.xlsx found. Date modified:", file.info(file.path(dir_raw,"HC_simulated_vege.xlsx"))$mtime)
  }
dir_raw/HC_simulated_vege.xlsx found. Date modified:2024-05-29 14:50:11.325357
##### 2) Function to read and output data #####
# ********************************************** #
#        Function to read and output data        #
# ********************************************** #

Read Nutrient Data and Output

This function reads diet simulation data from an Excel file, performs some cleaning operations, and outputs a list containing serving data, total nutrients, and composite food-specific nutrients.

@param hc_file Character string, the name of the Excel file. @param dri_group Character string, the sheet name in the Excel file. @param total_rownb Numeric, the total number of rows to read from the table in the dri_group-specific sheet

@return A list containing serving data, total nutrients, and composite food-specific nutrients.

@examples

@import readxl @import janitor @import stringr @import dplyr

@export

read_n_output <- function(hc_file, dri_group, total_rownb = 20) {
  
  message(paste0("Reading: ",hc_file, "; sheet: ", dri_group))
  # Helper function for reading Excel data
  read_excel_data <- function(range) {
    readxl::read_excel(
      file.path(dir_raw, hc_file),
      sheet = dri_group,
      range = range
    )
  }
  cat("Composite food serving ...\n")
  # Composite servings
  serving <- read_excel_data(paste0("A3:C", total_rownb-1)) |>
    janitor::clean_names() |>
    select(-x3) |>
    setNames(c("NB_RA", "COMPOSITE")) |>
    mutate(COMPOSITE = stringr::str_squish(COMPOSITE)) |>
    select(COMPOSITE, NB_RA)
  
  # Total nutrients
  cat("Total nutrient ...\n")
  varnames_nut <- read_excel_data("E2:AJ3") |> names()
  total_nut <- read_excel_data(paste0("E3:AJ", total_rownb)) |>
    janitor::clean_names() |>
    setNames(varnames_nut) 
  
  # Composite food-specific nutrients
  cat("Composite food-specific nutrients ...\n")
  composite_nut <- read_excel_data(paste0("A3:AJ", total_rownb-1)) |>
    janitor::clean_names() |>
    janitor::remove_empty(which = "cols") |>
    setNames(c("NB_RA", "COMPOSITE", varnames_nut)) |>
    mutate(COMPOSITE = stringr::str_squish(COMPOSITE))
  
  return(list(serving, total_nut, composite_nut))  
}
##### 3) Apply function to each DRI group #####
# ********************************************** #
#        Apply function to each DRI group        #
# ********************************************** #

# note: using omnivore diets with beverage as most likely reflect majority of Canadians

hc_file <- "HC_simulated_omni.xlsx"

drig12 <- 
  read_n_output(
    hc_file  ,
    dri_group   = "M 51-70",
    total_rownb = 19
  )
Reading: HC_simulated_omni.xlsx; sheet: M 51-70
Composite food serving ...
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
select: dropped one variable (x3)
mutate: no changes
select: columns reordered (COMPOSITE, NB_RA)
Total nutrient ...
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
Composite food-specific nutrients ...
New names:
mutate: no changes
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
drig14 <- 
  read_n_output(
    hc_file  ,
    dri_group   = "M 70 +",
    total_rownb = 22
  )
Reading: HC_simulated_omni.xlsx; sheet: M 70 +
Composite food serving ...
New names:
select: dropped one variable (x3)
mutate: no changes
select: columns reordered (COMPOSITE, NB_RA)
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
Total nutrient ...
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
Composite food-specific nutrients ...
New names:
mutate: no changes
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
drig13 <- 
  read_n_output(
    hc_file  ,
    dri_group   = "F 51-70",
    total_rownb = 20
  )
Reading: HC_simulated_omni.xlsx; sheet: F 51-70
Composite food serving ...
New names:
select: dropped one variable (x3)
mutate: no changes
select: columns reordered (COMPOSITE, NB_RA)
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
Total nutrient ...
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
Composite food-specific nutrients ...
New names:
mutate: no changes
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
drig15 <- 
  read_n_output(
    hc_file ,
    dri_group   = "F 70 +",
    total_rownb = 20
  )
Reading: HC_simulated_omni.xlsx; sheet: F 70 +
Composite food serving ...
New names:
select: dropped one variable (x3)
mutate: no changes
select: columns reordered (COMPOSITE, NB_RA)
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
Total nutrient ...
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
Composite food-specific nutrients ...
New names:
mutate: no changes
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
##### 4) Calculate total intakes for each DRI group #####
# ********************************************** #
#   Calculate total intakes for each DRI group   #
# ********************************************** #

Calculate DRI group Nutrients

This function calculates nutrient summary for a specific DRI group.

@param drig A list containing DRI group data. @param drig_suffix Numeric, the DRI group suffix (e.g., 12). @param nut_list Character, vector of nutrients (i.e., variable names) for which totals should be calculated

@return A data frame containing the calculated nutrient summary for the specified DRI group.

@examples

@import dplyr

@export

calculate_drig_nutrients <- function(drig, drig_suffix, nut_list = c("EKC","FSUG","FAS","FAM","FAP","SOD","PRO")) {
  
  # Output detailed summary to calculate fat intake (missing)
  all_but_total <- drig[[2]][1:nrow(drig[[2]])-1,]
  
  drig_nut <-
    drig[[2]] |>
    select(all_of(nut_list)) |>
    slice_tail() |>
    mutate(
      # add missing nutrients
      FAM = sum(all_but_total$FAM, na.rm = TRUE),
      FAP = sum(all_but_total$FAP, na.rm = TRUE),
      # add drig id
      drig = drig_suffix
    )
  
    return(drig_nut)
}

# ************************** #
#   Apply function on list   #
# ************************** #

drig_all <- list(drig12, drig14, drig13, drig15)
drig_suffix_all <- c(12, 14, 13, 15)

drig_nut_all <- 
  Map(calculate_drig_nutrients, drig_all, drig_suffix_all) |>
  purrr::reduce(rbind)
select: dropped 25 variables (CAR, FI, SUG, FAT, FAL, …)
slice_tail: removed 15 rows (94%), one row remaining
mutate: changed one value (100%) of 'FAM' (one fewer NA)
        changed one value (100%) of 'FAP' (one fewer NA)
        new variable 'drig' (double) with one unique value and 0% NA
select: dropped 25 variables (CAR, FI, SUG, FAT, FAL, …)
slice_tail: removed 18 rows (95%), one row remaining
mutate: changed one value (100%) of 'FAM' (one fewer NA)
        changed one value (100%) of 'FAP' (one fewer NA)
        new variable 'drig' (double) with one unique value and 0% NA
select: dropped 25 variables (CAR, FI, SUG, FAT, FAL, …)
slice_tail: removed 16 rows (94%), one row remaining
mutate: new variable 'drig' (double) with one unique value and 0% NA
select: dropped 25 variables (CAR, FI, SUG, FAT, FAL, …)
slice_tail: removed 16 rows (94%), one row remaining
mutate: changed one value (100%) of 'FAM' (one fewer NA)
        changed one value (100%) of 'FAP' (one fewer NA)
        new variable 'drig' (double) with one unique value and 0% NA
# ************************** #
#   Males, 51-70: drig=12    #
# ************************** #

drig12_ra <- 
  drig12[[1]] |>
  mutate(drig=12)
mutate: new variable 'drig' (double) with one unique value and 0% NA
# ************************** #
#    Males, 71 +: drig=14    #
# ************************** #

drig14_ra <- 
  drig14[[1]] |>
  mutate(drig=14)
mutate: new variable 'drig' (double) with one unique value and 0% NA
# ************************** #
#  Females, 51-70: drig=13   #
# ************************** #

drig13_ra <- 
  drig13[[1]] |>
  mutate(drig=13)
mutate: new variable 'drig' (double) with one unique value and 0% NA
# ************************** #
#    Males, 71 +: drig=15    #
# ************************** #

drig15_ra <- 
  drig15[[1]] |>
  mutate(drig=15)
mutate: new variable 'drig' (double) with one unique value and 0% NA
##### 5) Append data together #####
# ********************************************** #
#              Append data together              #
# ********************************************** #

dietsim_bydrig <-
  rbind(
    drig12_ra , drig14_ra , drig13_ra , drig15_ra) |>
  mutate(
    # Add HEFI-2019 classification
    hefi2019subgrp =
      case_when(
        grepl("TOTAL VEGETABLES",COMPOSITE)>0 ~ "vf",
        grepl("TOTAL WHOLE",COMPOSITE)>0 ~ "wg",
        grepl("ANIMAL-BASED",COMPOSITE)>0 ~ "pfab",
        grepl("PLANT-BASED",COMPOSITE)>0 ~ "pfpb",
        grepl("HEALTHY BEVERAGES",COMPOSITE)>0 ~ "milk_plantbev",
        grepl("UNSATURATED OILS",COMPOSITE)>0 ~ "ufa"
      ) # end of case_when
  ) |> # end of mutate
  # remove non-hefi food groups (i.e., subclassification)
  filter(is.na(hefi2019subgrp)==FALSE) |>
  # Transpose long data to wide
  select(-COMPOSITE) |>
  pivot_wider(
    names_from  = "hefi2019subgrp",
    values_from = "NB_RA"
  ) |>
  # add hefi-2019 nutrients
  full_join(
    drig_nut_all,
    by = "drig"
  ) |>
  # ensure variable names are not capital letters
  janitor::clean_names()
mutate: new variable 'hefi2019subgrp' (character) with 7 unique values and 63% NA
filter: removed 41 rows (63%), 24 rows remaining
select: dropped one variable (COMPOSITE)
pivot_wider: reorganized (NB_RA, hefi2019subgrp) into (vf, wg, pfpb, pfab, milk_plantbev, …) [was 24x3, now 4x7]
full_join: added 7 columns (EKC, FSUG, FAS, FAM, FAP, …)
           > rows only in pivot_wider(select(filt..  0
           > rows only in drig_nut_all               0
           > matched rows                            4
           >                                        ===
           > rows total                              4
# Generate an 'overall' row
dietsim_drig0 <- 
  dietsim_bydrig |>
  # calculate mean
  summarise(
    across(.cols=all_of(names(dietsim_bydrig[-1])),
           function(x) mean(x),
           .names ="{col}" )
  ) |>
  mutate(
    drig=0
  )
summarise: now one row and 13 columns, ungrouped
mutate: new variable 'drig' (double) with one unique value and 0% NA
dim(dietsim_drig0); head(dietsim_drig0)
[1]  1 14
# A tibble: 1 × 14
     vf    wg  pfpb  pfab milk_plantbev   ufa   ekc  fsug   fas   fam   fap
  <dbl> <dbl> <dbl> <dbl>         <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1  9.03  3.92  2.04  1.88             1     1 1865.  6.37  10.2  21.5  20.1
# ℹ 3 more variables: sod <dbl>, pro <dbl>, drig <dbl>
# Add 'overall' row to drig-specific rows
dietsim <- 
  rbind(dietsim_bydrig, dietsim_drig0) |>
  arrange(drig) |>
  mutate(
    drig_f = 
      factor(drig,
             levels = c(0, 12, 13, 14, 15),
             labels = c("Male and female, 51 y or older",
                        "Male, 51-70 y",
                        "Female, 51-70 y",
                        "Male, 71y+",
                        "Female, 71y+"))
  )
mutate: new variable 'drig_f' (factor) with 5 unique values and 0% NA
dim(dietsim); names(dietsim); head(dietsim)
[1]  5 15
 [1] "drig"          "vf"            "wg"            "pfpb"         
 [5] "pfab"          "milk_plantbev" "ufa"           "ekc"          
 [9] "fsug"          "fas"           "fam"           "fap"          
[13] "sod"           "pro"           "drig_f"       
# A tibble: 5 × 15
   drig    vf    wg  pfpb  pfab milk_plantbev   ufa   ekc  fsug   fas   fam
  <dbl> <dbl> <dbl> <dbl> <dbl>         <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     0  9.03  3.92  2.04  1.88             1     1 1865.  6.37 10.2   21.5
2    12  9.93  5     2.42  2.29             1     1 2259.  7.10 11.7   24.4
3    13  8.56  3.5   2     1.85             1     1 1737.  5.52  9.74  20.8
4    14  9.93  4.2   2     1.84             1     1 1959.  6.98 10.6   22.3
5    15  7.71  3     1.72  1.56             1     1 1507.  5.87  8.71  18.5
# ℹ 4 more variables: fap <dbl>, sod <dbl>, pro <dbl>, drig_f <fct>
# ************************** #
#         Add labels         #
# ************************** #

labelled::var_label(dietsim) <-
list(
  drig          = "DRI age and sex group",
  drig_f        = "DRI age and sex group",
  vf            = "Vegetables and fruits, RA/d",
  wg            = "Whole-grain foods, RA/d",
  pfpb          = "Protein foods, plant-based, RA/d",
  pfab          = "Protein foods, animal-based, RA/d",
  milk_plantbev = "Unsweetened milk and plant beverage with protein, RA/d",
  ufa           = "Unsaturated fats, RA/d",
  ekc           = "Total energy intake, kcal/d",
  fsug          = "Free sugars, grams/d",
  fas           = "Saturated fats, grams/d",
  fam           = "Monounsaturated fats, grams/d",
  fap           = "Polyunsaturated fats, grams/d",
  sod           = "Sodium, mg/d",
  pro           = "Protein, grams/d")
#### Calculate HEFI-2019 scores ####
# ********************************************** #
#           Calculate HEFI-2019 scores           #
# ********************************************** #

# Apply the hefi-2019 scoring algorithm
 
dietsim_hefi <-
  dietsim |>
  mutate(
    # Change milk_plantbev RA to grams
    milk_plantbev = milk_plantbev*258,
    # Without further information for milk_plantbev = split half/half
    milk = milk_plantbev/2,
    plantbev = milk_plantbev/2
  ) |>
  hefi2019::hefi2019(#indata      = dietsim,
    vegfruits          = vf,
    wholegrfoods       = wg,
    nonwholegrfoods    = 0,
    profoodsanimal     = pfab,
    profoodsplant      = pfpb,
    otherfoods         = 0,
    waterhealthybev    = 0,
    unsweetmilk        = milk,
    unsweetplantbevpro = plantbev,
    otherbeverages     = 0 ,
    mufat              = fam ,
    pufat              = fap ,
    satfat             = fas ,
    freesugars         = fsug,
    sodium             = sod,
    energy             = ekc
  )
Healthy Eating Food Index-2019 Scoring Algorithm R version 1.4
mutate: changed 5 values (100%) of 'milk_plantbev' (0 new NAs)
        new variable 'milk' (double) with one unique value and 0% NA
        new variable 'plantbev' (double) with one unique value and 0% NA
# ************************** #
#         Add labels         #
# ************************** #

labelled::var_label(dietsim_hefi) <-
  list(
    drig          = "DRI age and sex group",
    drig_f        = "DRI age and sex group",
    vf            = "Vegetables and fruits, RA/d",
    wg            = "Whole-grain foods, RA/d",
    pfpb          = "Protein foods, plant-based, RA/d",
    pfab          = "Protein foods, animal-based, RA/d",
    milk_plantbev = "Unsweetened milk and plant beverage with protein, RA/d",
    ufa           = "Unsaturated fats, RA/d",
    ekc           = "Total energy intake, kcal/d",
    fsug          = "Free sugars, grams/d",
    fas           = "Saturated fats, grams/d",
    fam           = "Monounsaturated fats, grams/d",
    fap           = "Polyunsaturated fats, grams/d",
    sod           = "Sodium, mg/d",
    pro           = "Protein, grams/d")
#### Summary and save ####
# ********************************************** #
#                Summary and save                #
# ********************************************** #

# summary
dietsim |>
  select(-c("drig", "ekc":"pro")) |>
  gt::gt() |>
  gt::cols_move_to_start("drig_f") |>
  gt::fmt_number(columns = everything(), decimals = 1) |>
  gt::tab_header("Intakes of major food categories in Health Canada simulated diets, by DRI group") |>
  gt::tab_source_note(source_note = "Data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be")
select: dropped 8 variables (drig, ekc, fsug, fas, fam, …)
Intakes of major food categories in Health Canada simulated diets, by DRI group
DRI age and sex group Vegetables and fruits, RA/d Whole-grain foods, RA/d Protein foods, plant-based, RA/d Protein foods, animal-based, RA/d Unsweetened milk and plant beverage with protein, RA/d Unsaturated fats, RA/d
Male and female, 51 y or older 9.0 3.9 2.0 1.9 1.0 1.0
Male, 51-70 y 9.9 5.0 2.4 2.3 1.0 1.0
Female, 51-70 y 8.6 3.5 2.0 1.9 1.0 1.0
Male, 71y+ 9.9 4.2 2.0 1.8 1.0 1.0
Female, 71y+ 7.7 3.0 1.7 1.6 1.0 1.0
Data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be
dietsim |>
  select("drig_f", "ekc":"pro") |>
  gt::gt() |>
  gt::cols_move_to_start("drig_f") |>
  gt::fmt_number(columns = everything(), decimals = 1) |>
  gt::tab_header("Intakes of major nutrients in Health Canada simulated diets, by DRI group") |>
  gt::tab_source_note(source_note = "Data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be")
select: dropped 7 variables (drig, vf, wg, pfpb, pfab, …)
Intakes of major nutrients in Health Canada simulated diets, by DRI group
DRI age and sex group Total energy intake, kcal/d Free sugars, grams/d Saturated fats, grams/d Monounsaturated fats, grams/d Polyunsaturated fats, grams/d Sodium, mg/d Protein, grams/d
Male and female, 51 y or older 1,865.3 6.4 10.2 21.5 20.1 1,065.8 97.6
Male, 51-70 y 2,258.8 7.1 11.7 24.4 20.8 1,174.3 118.7
Female, 51-70 y 1,736.7 5.5 9.7 20.8 20.5 992.5 91.7
Male, 71y+ 1,958.7 7.0 10.6 22.3 20.7 1,141.7 102.1
Female, 71y+ 1,507.0 5.9 8.7 18.5 18.4 954.5 77.9
Data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be
dietsim_hefi |>
  select(starts_with("HEFI2019")) |>
  gt::gt() |>
  gt::fmt_number(columns = everything(), decimals = 1) |>
  gt::tab_header("HEFI-2019 scores of Health Canada simulated diets, by DRI group") |>
  gt::tab_source_note(source_note = "Based on data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be")
select: dropped 28 variables (drig, vf, wg, pfpb, pfab, …)
HEFI-2019 scores of Health Canada simulated diets, by DRI group
HEFI2019 C1 Vegetables and fruits HEFI2019 C2 Whole-grain foods HEFI2019 C3 Grain foods ratio HEFI2019 C4 Protein foods HEFI2019 C5 Plant-based protein foods HEFI2019 C6 Beverages HEFI2019 C7 Fatty acids ratio HEFI2019 C8 Saturated fats HEFI2019 C9 Free sugars HEFI2019 C10 Sodium Total Healthy Eating Food Index (/80)
20.0 4.4 5.0 5.0 4.1 10.0 5.0 5.0 10.0 10.0 78.5
19.2 4.8 5.0 5.0 4.2 10.0 5.0 5.0 10.0 10.0 78.3
20.0 4.1 5.0 5.0 4.1 10.0 5.0 5.0 10.0 10.0 78.3
20.0 4.4 5.0 5.0 4.1 10.0 5.0 5.0 10.0 10.0 78.6
20.0 4.0 5.0 5.0 4.0 10.0 5.0 5.0 10.0 10.0 78.0
Based on data from Health Canada. (2022). Simulated composite diets. Available at https://open.canada.ca/data/dataset/0490749d-b0b0-410a-9577-a903c6cec2be
# save intake only
save_and_summarize_data(
  dietsim,
  dir = dir_processed,
  dir_metadata = dir_metadata
)
Warning in fun(libname, pkgname): couldn't connect to display
"/private/tmp/com.apple.launchd.6tu9LhZbap/org.xquartz:0"
system might not have X11 capabilities; in case of errors when using dfSummary(), set st_options(use.x11 = FALSE)
Output file written: /Users/DidierBrassard/RStudio/NuAge_protocol/data/metadata/dietsim.html
# save intake + hefi
save_and_summarize_data(
  dietsim_hefi,
  dir = dir_processed,
  dir_metadata = dir_metadata
)
Output file written: /Users/DidierBrassard/RStudio/NuAge_protocol/data/metadata/dietsim_hefi.html
# *********************************************************************** #
#                              End of code                                #
# *********************************************************************** #

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.7.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Toronto
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] summarytools_1.0.1 here_1.0.1         gt_0.11.1          hefi2019_0.0.1.4  
[5] janitor_2.2.0      readxl_1.4.3       tidylog_1.1.0      dplyr_1.1.4       

loaded via a namespace (and not attached):
 [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    tidyr_1.3.1      
 [5] tcltk_4.3.1       xml2_1.3.6        stringi_1.8.4     rematch_2.0.0    
 [9] hms_1.1.3         digest_0.6.37     magrittr_2.0.3    evaluate_1.0.1   
[13] timechange_0.3.0  fastmap_1.2.0     plyr_1.8.9        cellranger_1.1.0 
[17] rprojroot_2.0.4   jsonlite_1.8.9    rapportools_1.1   backports_1.5.0  
[21] pander_0.6.5      purrr_1.0.2       fansi_1.0.6       codetools_0.2-20 
[25] cli_3.6.3         labelled_2.13.0   rlang_1.1.4       base64enc_0.1-3  
[29] withr_3.0.2       yaml_2.3.10       tools_4.3.1       reshape2_1.4.4   
[33] pryr_0.1.6        checkmate_2.3.2   forcats_1.0.0     vctrs_0.6.5      
[37] R6_2.5.1          matrixStats_1.4.1 magick_2.8.5      lifecycle_1.0.4  
[41] lubridate_1.9.3   snakecase_0.11.1  stringr_1.5.1     htmlwidgets_1.6.4
[45] MASS_7.3-60.0.1   clisymbols_1.2.0  pkgconfig_2.0.3   pillar_1.9.0     
[49] Rcpp_1.0.13-1     glue_1.8.0        haven_2.5.4       xfun_0.49        
[53] tibble_3.2.1      tidyselect_1.2.1  rstudioapi_0.17.1 knitr_1.49       
[57] htmltools_0.5.8.1 rmarkdown_2.29    compiler_4.3.1