Skip to contents

Filters a clinical events table created by tidy_clinical_events for a set clinical codes that represent one or more phenotypes.

Usage

extract_phenotypes(
  clinical_events,
  clinical_codes,
  source_filter = NULL,
  verbose = TRUE
)

Arguments

clinical_events

A long format data frame created by tidy_clinical_events, tidy_gp_clinical, tidy_gp_scripts or make_clinical_events_db. This can also be a tbl_dbi object.

clinical_codes

data frame. Must match the format as per example_clinical_codes.

source_filter

Character vector of data sources to filter for (optional).

verbose

If TRUE (default), display progress messages.

Value

A data frame.

Examples

library(magrittr)

# dummy clinical events data frame
dummy_ukb_data_dict <- get_ukb_dummy("dummy_Data_Dictionary_Showcase.tsv")
dummy_ukb_codings <- get_ukb_dummy("dummy_Codings.tsv")

dummy_clinical_events <- read_ukb(
  path = get_ukb_dummy("dummy_ukb_main.tsv", path_only = TRUE),
  ukb_data_dict = dummy_ukb_data_dict,
  ukb_codings = dummy_ukb_codings
) %>%
  tidy_clinical_events(
    ukb_data_dict = dummy_ukb_data_dict,
    ukb_codings = dummy_ukb_codings
  ) %>%
  dplyr::bind_rows()
#> Creating data dictionary
#> STEP 1 of 3
#> Reading data into R
#> STEP 2 of 3
#> Renaming with descriptive column names
#> STEP 3 of 3
#> Applying variable and value labels
#> Labelling dataset
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for primary_death_icd10
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for secondary_death_icd10
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for self_report_medication
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for self_report_non_cancer
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for self_report_non_cancer_icd10
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for self_report_cancer
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for self_report_operation
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for cancer_register_icd9
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for cancer_register_icd10
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for summary_hes_icd9
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for summary_hes_icd10
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for summary_hes_opcs3
#> Time taken: 0 minutes, 0 seconds.
#> Tidying clinical events for summary_hes_opcs4
#> Time taken: 0 minutes, 0 seconds.

head(dummy_clinical_events)
#>      eid source  index   code       date
#>    <int> <char> <char> <char>     <char>
#> 1:     1 f40001    0_0   X095 1917-10-08
#> 2:     2 f40001    0_0   A162 1955-02-11
#> 3:     1 f40001    1_0   X095 1910-02-19
#> 4:     2 f40001    1_0   A162 1965-08-08
#> 5:     1 f40002    0_0   W192 1917-10-08
#> 6:     2 f40002    0_0   V374 1955-02-11

# dummy clinical code list
example_clinical_codes()
#> # A tibble: 8 × 6
#>   disease  description                           category code_type code  author
#>   <chr>    <chr>                                 <chr>    <chr>     <chr> <chr> 
#> 1 Diabetes diabetes                              Diabete… data_cod… 1220  ukbwr 
#> 2 Diabetes gestational diabetes                  Gestati… data_cod… 1221  ukbwr 
#> 3 Diabetes type 1 diabetes                       Type 1 … data_cod… 1222  ukbwr 
#> 4 Diabetes type 2 diabetes                       Type 2 … data_cod… 1223  ukbwr 
#> 5 Diabetes Type 1 diabetes mellitus              Type 1 … icd10     E10   ukbwr 
#> 6 Diabetes Type 2 diabetes mellitus              Type 2 … icd10     E11   ukbwr 
#> 7 Diabetes Insulin dependent diabetes mellitus   Type 1 … read2     C108. ukbwr 
#> 8 Diabetes Non-insulin dependent diabetes melli… Type 2 … read2     C109. ukbwr 

# Filter for participants with matching clinical codes
extract_phenotypes(
  clinical_events = dummy_clinical_events,
  clinical_codes = example_clinical_codes()
)
#> Filtering for requested clinical codes/sources
#> Joining filtered events with clinical codelist
#> Time taken: 0 minutes, 0 seconds.
#>      eid       source  index   code       date     code_type  disease  category
#>    <int>       <char> <char> <char>     <char>        <char>   <char>    <char>
#> 1:     1       f20002    0_3   1223 2003-02-25 data_coding_6 Diabetes Type 2 DM
#> 2:     1 f20002_icd10    0_3    E11 2003-02-25         icd10 Diabetes Type 2 DM
#> 3:     2       f41270    0_0    E11 1939-02-16         icd10 Diabetes Type 2 DM
#> 4:     1       f41270    0_3    E10 1910-02-19         icd10 Diabetes Type 1 DM
#>    author
#>    <char>
#> 1:  ukbwr
#> 2:  ukbwr
#> 3:  ukbwr
#> 4:  ukbwr