MultimodalHealthGenerator

library(healthsimulation)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: R6
## 
## Loading required package: corrplot
## 
## corrplot 0.95 loaded
## 
## Loading required package: viridis
## 
## Loading required package: viridisLite
## 
## Loading required package: ggwordcloud
# Example usage and demonstration
cat("=== MULTIMODAL HEALTH TIME SERIES GENERATOR R6 CLASS ===\n\n")
## === MULTIMODAL HEALTH TIME SERIES GENERATOR R6 CLASS ===
cat("Creating generator instance...\n")
## Creating generator instance...
# Create generator with seed
generator <- MultimodalHealthGenerator$new(seed = 42)
## MultimodalHealthGenerator initialized with seed: 42
# Generate data
health_data <- generator$generate(n_days = 100, start_date = "2024-01-01")
## Generating 100 days of multimodal health data starting from 2024-01-01 
## ✓ Generated dataset with dimensions: 100 × 19
# Display sample
cat("\nFirst 5 rows of generated data:\n")
## 
## First 5 rows of generated data:
print(head(health_data, 5))
##         date day_of_year day_of_week is_weekend month season hrv_ms
## 1 2024-01-01           1           1      FALSE     1 winter   60.1
## 2 2024-01-02           2           2      FALSE     1 winter   45.9
## 3 2024-01-03           3           3      FALSE     1 winter   50.8
## 4 2024-01-04           4           4      FALSE     1 winter   48.9
## 5 2024-01-05           5           5      FALSE     1 winter   44.6
##   resting_hr_bpm sleep_quality_score daily_steps air_quality_index
## 1             75                  52        9057               105
## 2             79                  74       10590                72
## 3             69                  86        8426                86
## 4             81                  96        7735                86
## 5             72                  53        5330                76
##   indoor_temp_c humidity_percent light_exposure_lux_hrs stress_level
## 1          20.5               41                    486          6.9
## 2          21.8               41                    342          5.1
## 3          23.0               37                    378          5.4
## 4          22.7               33                    330          5.2
## 5          19.7               25                    220          6.9
##   text_sentiment text_complexity_words health_keywords_count
## 1         -0.224                  25.0                     3
## 2          0.091                  18.6                     7
## 3          0.377                  22.7                     3
## 4          0.346                  21.6                     3
## 5         -0.061                  25.0                     2
##   daily_wellness_score
## 1                 46.2
## 2                 51.3
## 3                 53.8
## 4                 54.5
## 5                 34.9
# Get summary statistics
cat("\nSummary statistics:\n")
## 
## Summary statistics:
summary_stats <- generator$get_summary_stats()
print(summary_stats)
## # A tibble: 14 × 5
##    variable                  mean      sd     min      max
##    <chr>                    <dbl>   <dbl>   <dbl>    <dbl>
##  1 month                     2.18    0.98    1        4   
##  2 hrv_ms                   55.4     9.84   25.4     73.5 
##  3 resting_hr_bpm           68.7     6.49   53       90   
##  4 sleep_quality_score      76.7    12.8    52      100   
##  5 daily_steps            7865.   1931.   3471    11506   
##  6 air_quality_index        79.6    15.1    49      129   
##  7 indoor_temp_c            21.3     1.67   18       25.4 
##  8 humidity_percent         41.4    12.1    25       78   
##  9 light_exposure_lux_hrs  356.    110.    100      614   
## 10 stress_level              6.7     1.5     2.5     10   
## 11 text_sentiment            0.05    0.21   -0.42     0.52
## 12 text_complexity_words    23.6     2.14   14.4     25   
## 13 health_keywords_count     3.81    2.09    0       10   
## 14 daily_wellness_score     50.6     6.87   34.4     67
# Get correlations with target
cat("\nTop correlations with wellness score:\n")
## 
## Top correlations with wellness score:
correlations <- generator$get_target_correlations()
print(head(correlations, 8))
##                                      Variable Correlation
## sleep_quality_score       sleep_quality_score       0.571
## text_sentiment                 text_sentiment       0.460
## hrv_ms                                 hrv_ms       0.433
## stress_level                     stress_level      -0.380
## daily_steps                       daily_steps       0.372
## light_exposure_lux_hrs light_exposure_lux_hrs       0.314
## month                                   month       0.311
## air_quality_index           air_quality_index      -0.290
# Analyze interdependencies
cat("\nInterdependency analysis:\n")
## 
## Interdependency analysis:
interdep <- generator$analyze_interdependencies()
print(interdep$stress_effects)
## # A tibble: 4 × 7
##   stress_quartile avg_wellness avg_sleep avg_hrv avg_sentiment avg_keywords
##             <int>        <dbl>     <dbl>   <dbl>         <dbl>        <dbl>
## 1               1         53.6      79.6    52.8         0.251          3.4
## 2               2         51.8      79.9    55           0.091          3.8
## 3               3         50        73.6    56.4         0.016          3.5
## 4               4         47.1      73.7    57.5        -0.163          4.5
## # ℹ 1 more variable: stress_level_category <chr>
cat("\n=== GENERATOR FEATURES ===\n")
## 
## === GENERATOR FEATURES ===
cat("✓ R6 class with clean OOP interface\n")
## ✓ R6 class with clean OOP interface
cat("✓ Seed-based reproducible generation\n")
## ✓ Seed-based reproducible generation
cat("✓ Comprehensive documentation\n")
## ✓ Comprehensive documentation
cat("✓ Built-in analysis methods\n")
## ✓ Built-in analysis methods
cat("✓ Visualization capabilities\n")
## ✓ Visualization capabilities
cat("✓ Data export functionality\n")
## ✓ Data export functionality
cat("✓ Modular generation pipeline\n")
## ✓ Modular generation pipeline
cat("✓ Parameter validation\n")
## ✓ Parameter validation
cat("✓ Metadata tracking\n")
## ✓ Metadata tracking
generator$plot_time_series()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the healthsimulation package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.