library(bipolarPreprocessing)
library(dplyr)
<- get_sample_visits()
visits <- get_sample_mobile_recordings()
mobile_recordings <- get_sample_mobile_chunks() mobile_chunks
Check mobile recordings dataset by name (audio file):
<- mobile_recordings %>%
recordings_duplicates count(name) %>%
arrange(-n) %>%
filter(n > 1) %>%
pull(name)
%>% filter(name %in% recordings_duplicates)
mobile_recordings #> # A tibble: 0 × 29
#> # … with 29 variables: patient_id <dbl>, dw_mobilerecording_id <dbl>,
#> # st_mobilerecording_id <dbl>, dw_patient_id <dbl>, st_patient_id <dbl>,
#> # create_date_date_id <dbl>, create_date_time_id <dbl>, name <chr>,
#> # header <chr>, duration <dbl>, is_analyzed <lgl>, is_data_available <lgl>,
#> # is_outgoing_call <lgl>, chunks_count <dbl>, successfully_recorded <lgl>,
#> # data <lgl>, is_synced <lgl>, sync_date_date_id <dbl>,
#> # sync_date_time_id <dbl>, missing_fields_flag <lgl>, …
Check chunks by chunk id and frame no:
<- mobile_chunks %>%
chunks_duplicates count(dw_mobilerecordingchunk_id, frame_nr) %>%
arrange(-n) %>%
filter(n > 1)
if (nrow(chunks_duplicates) > 0) {
%>%
mobile_chunks inner_join(chunks_duplicates) %>%
arrange(dw_mobilerecordingchunk_id, frame_nr)
}
No duplicates in our data. If there were any we should filter them out.
Data collected before 2018 is unreliable due to a bug in the library that was used to collect all the parameters of a call. We’ll work only with data from 2018.
<- visits %>% filter(visit_date >= "2018-01-01")
visits <- mobile_recordings %>% filter(create_date >= "2018-01-01") mobile_recordings
We don’t filter mobile_chunks
as it’s not clear when the
data was recorded. Instead, we’ll use corresponding rows from
mobile_recordings
dataset to get only valid rows by joining
the datasets together.
See the distribution of mobile_recordings$chunks_count
values:
$chunks_count %>% summary()
mobile_recordings#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 1.0 1.0 2.0 2.3 3.5 5.0
Recordings with more than 10 chunks are unreliable.
<- mobile_recordings %>% filter(chunks_count <= 10)
mobile_recordings_filtered
<- mobile_recordings %>%
unreliable_rec_id filter(chunks_count > 10) %>%
select(dw_mobilerecording_id, chunks_count)
unreliable_rec_id#> # A tibble: 0 × 2
#> # … with 2 variables: dw_mobilerecording_id <dbl>, chunks_count <dbl>
<- get_sample_call_parameters()
sample_call_parameters
<- tryCatch({
stats %>%
mobile_chunks ::nest(data = -dw_mobilerecording_id) %>%
tidyrmutate(stats = lapply(data, function(i) {
%>%
i select(all_of(sample_call_parameters)) %>%
summarise_at(vars(sample_call_parameters), list(mean = mean, sd = sd))
}))
})
<- stats %>%
stats select(dw_mobilerecording_id, stats) %>%
::unnest(cols = c(stats)) %>%
tidyrselect(dw_mobilerecording_id, ends_with("mean"), ends_with("sd"))
stats#> # A tibble: 10 × 173
#> dw_mobilere…¹ pcm_L…² pcm_z…³ voice…⁴ f0_sm…⁵ f0env…⁶ pcm_f…⁷ pcm_f…⁸ pcm_f…⁹
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21987 -20.6 0.222 0.541 159. 189. 1.57e-9 4.04e-7 891.
#> 2 21927 -14.0 0.398 0.239 0 0 1.34e-6 1.27e-5 1612.
#> 3 21929 -20.5 0.351 0.221 0 0 7.94e-9 1.10e-7 1535.
#> 4 21940 -19.0 0.335 0.275 7.20 3.56 1.04e-7 1.75e-6 1640.
#> 5 21981 -17.9 0.147 0.458 75.1 138. 5.05e-6 1.91e-5 828.
#> 6 21991 -21.1 0.216 0.200 0 0 5.43e-8 1.68e-7 856.
#> 7 21911 -19.9 0.229 0.371 31.0 134. 1.25e-6 4.68e-6 801.
#> 8 21913 -16.2 0.363 0.298 3.33 25.8 1.40e-6 4.00e-5 1921.
#> 9 21918 -13.2 0.379 0.362 37.1 128. 1.29e-6 1.01e-4 1771.
#> 10 21922 -20.1 0.372 0.214 0 0 6.04e-9 1.51e-7 1603.
#> # … with 164 more variables: pcm_fftMag_spectralRollOff50_0_sma_mean <dbl>,
#> # pcm_fftMag_spectralRollOff75_0_sma_mean <dbl>,
#> # pcm_fftMag_spectralRollOff90_0_sma_mean <dbl>,
#> # pcm_fftmag_spectralflux_sma_mean <dbl>,
#> # pcm_fftmag_spectralcentroid_sma_mean <dbl>,
#> # pcm_fftmag_spectralmaxpos_sma_mean <dbl>,
#> # pcm_fftmag_spectralminpos_sma_mean <dbl>, f0final_sma_mean <dbl>, …
We calculated stiatistics like this (only first 16 rows showed here):
#> [1] "F0semitoneFrom27_5Hz_sma3nz_mean" "F0semitoneFrom27_5Hz_sma3nz_sd"
#> [3] "alpharatio_sma3_mean" "alpharatio_sma3_sd"
#> [5] "audSpec_Rfilt_sma_compare_0__mean" "audSpec_Rfilt_sma_compare_0__sd"
#> [7] "audSpec_Rfilt_sma_compare_10__mean" "audSpec_Rfilt_sma_compare_10__sd"
#> [9] "audSpec_Rfilt_sma_compare_11__mean" "audSpec_Rfilt_sma_compare_11__sd"
#> [11] "audSpec_Rfilt_sma_compare_12__mean" "audSpec_Rfilt_sma_compare_12__sd"
#> [13] "audSpec_Rfilt_sma_compare_13__mean" "audSpec_Rfilt_sma_compare_13__sd"
#> [15] "audSpec_Rfilt_sma_compare_14__mean" "audSpec_Rfilt_sma_compare_14__sd"
For example, for the variable f1frequency_sma3nz statistics:
#> # A tibble: 10 × 2
#> f1frequency_sma3nz_mean f1frequency_sma3nz_sd
#> <dbl> <dbl>
#> 1 644. 213.
#> 2 782. 252.
#> 3 865. 302.
#> 4 738. 275.
#> 5 647. 224.
#> 6 622. 238.
#> 7 644. 285.
#> 8 899. 254.
#> 9 873. 245.
#> 10 807. 324.
Transformation functions are used to re-code states into new values, like ‘healty/unhealty’ instead of ‘euthymia/depression’.
<- transform_label_healthy_unhealthy(visits, 'hamd_ymrs')
visits_label_hu <- transform_label_custom(visits, 'hamd_ymrs')
visits_label_custom <- transform_label_cgi(visits, 'hamd_ymrs') visits_label_cgi