library(codemapper)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

all_lkps_maps_dummy <- build_all_lkps_maps_dummy()

Two formats: ‘ICD10_CODE’ and ‘ALT_CODE’

ICD10 codes may be recorded as either ‘ICD10_CODE’ or ‘ALT_CODE’ format. For example, ‘E10.9’ (Type 1 diabetes mellitus without complications) is recorded as ‘E109’ in ‘ALT_CODE’ format.

Both formats are provided in the ICD10 lookup table:

all_lkps_maps_dummy$icd10_lkp
#> # A tibble: 197 × 13
#>    .rowid ICD10_CODE ALT_CODE USAGE   USAGE_UK DESCRIPTION MODIFIER_4 MODIFIER_5
#>     <int> <chr>      <chr>    <chr>   <chr>    <chr>       <chr>      <chr>     
#>  1      1 A00        A00      DEFAULT 3        Cholera     NA         NA        
#>  2      2 A00.0      A000     DEFAULT 3        Cholera du… NA         NA        
#>  3      3 A00.1      A001     DEFAULT 3        Cholera du… NA         NA        
#>  4      4 A00.9      A009     DEFAULT 3        Cholera, u… NA         NA        
#>  5      5 A01.0      A010     DEFAULT 3        Typhoid fe… NA         NA        
#>  6      6 A02        A02      DEFAULT 3        Other salm… NA         NA        
#>  7      7 A02.0      A020     DEFAULT 3        Salmonella… NA         NA        
#>  8      8 A02.1      A021     DEFAULT 3        Salmonella… NA         NA        
#>  9      9 A02.2      A022     DEFAULT 3        Localized … NA         NA        
#> 10     10 A02.8      A028     DEFAULT 3        Other spec… NA         NA        
#> # … with 187 more rows, and 5 more variables: QUALIFIERS <chr>,
#> #   GENDER_MASK <chr>, MIN_AGE <chr>, MAX_AGE <chr>, TREE_DESCRIPTION <chr>

However only the ‘ALT_CODE’ format is used in all other mapping tables that include ICD10 codes:

all_lkps_maps_dummy$icd9_icd10 %>% 
  select(ICD9,
         ICD10,
         DESCRIPTION_ICD10)
#> # A tibble: 47 × 3
#>    ICD9  ICD10 DESCRIPTION_ICD10                                           
#>    <chr> <chr> <chr>                                                       
#>  1 0020  A010  Typhoid fever                                               
#>  2 0341  A38X  Scarlet fever                                               
#>  3 218   D250  Submucous leiomyoma of uterus                               
#>  4 218   D251  Intramural leiomyoma of uterus                              
#>  5 218   D252  Subserosal leiomyoma of uterus                              
#>  6 218   D259  Leiomyoma of uterus, unspecified                            
#>  7 2890  D751  Secondary polycythaemia                                     
#>  8 NA    E100  Insulin-dependent diabetes mellitus with coma               
#>  9 NA    E101  Insulin-dependent diabetes mellitus with ketoacidosis       
#> 10 NA    E102  Insulin-dependent diabetes mellitus with renal complications
#> # … with 37 more rows

Searching for ICD10 codes in ‘ICD10_FORMAT’ will yield no results:

# ICD10_CODE format - no codes found
lookup_codes(codes = "E10.9",
             code_type = "icd10",
             all_lkps_maps = all_lkps_maps_dummy, 
             unrecognised_codes = "warning")
#> Warning in handle_unrecognised_codes(unrecognised_codes = unrecognised_codes, :
#> The following 1 codes were not found for 'icd10' in table 'icd10_lkp': 'E10.9'
#> No matching codes found. Returning `NULL`
#> NULL

# ALT_CODE format - now recognised
lookup_codes(codes = "E109",
             code_type = "icd10",
             all_lkps_maps = all_lkps_maps_dummy, 
             unrecognised_codes = "warning")
#> # A tibble: 1 × 3
#>   code  description                                    code_type
#>   <chr> <chr>                                          <chr>    
#> 1 E109  Type 1 diabetes mellitus Without complications icd10

Use reformat_icd10_codes() to convert from one form to the other:

reformat_icd10_codes(icd10_codes = c("E10.9"),
                     all_lkps_maps = all_lkps_maps_dummy, 
                     input_icd10_format = "ICD10_CODE",
                     output_icd10_format = "ALT_CODE") %>% 
  lookup_codes(code_type = "icd10",
               all_lkps_maps = all_lkps_maps_dummy)
#> # A tibble: 1 × 3
#>   code  description                                    code_type
#>   <chr> <chr>                                          <chr>    
#> 1 E109  Type 1 diabetes mellitus Without complications icd10

While for many ICD10 codes simply removing the ‘.’ will correctly convert from ‘ICD10_CODE’ format to ‘ALT_CODE’ format, this approach will not work in the following cases:

  1. Undivided 3 character ICD10 codes, which have an appended ‘X’ character in ‘ALT_CODE’ format. For example, ‘I10’ for Essential (primary) hypertension is recorded as ‘I10X’.

    reformat_icd10_codes(icd10_codes = c("I10"),
                         all_lkps_maps = all_lkps_maps_dummy, 
                         input_icd10_format = "ICD10_CODE",
                         output_icd10_format = "ALT_CODE") %>% 
      lookup_codes(code_type = "icd10",
                   all_lkps_maps = all_lkps_maps_dummy)
    #> # A tibble: 1 × 3
    #>   code  description                      code_type
    #>   <chr> <chr>                            <chr>    
    #> 1 I10X  Essential (primary) hypertension icd10
  2. ICD10 codes with a 5th character modifier. In these cases, a single code in ‘ICD10_CODE’ format will map to multiple codes in ’ALT_CODE`format and vice versa.

    # ICD10_CODE to ALT_CODE
    m90_alt_code <- reformat_icd10_codes(icd10_codes = c("M90.0"),
                         all_lkps_maps = all_lkps_maps_dummy, 
                         input_icd10_format = "ICD10_CODE",
                         output_icd10_format = "ALT_CODE") %>% 
      lookup_codes(code_type = "icd10",
                   all_lkps_maps = all_lkps_maps_dummy)
    #> The following 1 input ICD10 codes do not have a 1-to-1 ICD10_CODE-to-ALT_CODE mapping: 'M90.0'. There will therefore be *more* output than input codes
    
    m90_alt_code
    #> # A tibble: 11 × 3
    #>    code  description                                  code_type
    #>    <chr> <chr>                                        <chr>    
    #>  1 M900  Tuberculosis of bone                         icd10    
    #>  2 M9000 Tuberculosis of bone Multiple sites          icd10    
    #>  3 M9001 Tuberculosis of bone Shoulder region         icd10    
    #>  4 M9002 Tuberculosis of bone Upper arm               icd10    
    #>  5 M9003 Tuberculosis of bone Forearm                 icd10    
    #>  6 M9004 Tuberculosis of bone Hand                    icd10    
    #>  7 M9005 Tuberculosis of bone Pelvic region and thigh icd10    
    #>  8 M9006 Tuberculosis of bone Lower leg               icd10    
    #>  9 M9007 Tuberculosis of bone Ankle and foot          icd10    
    #> 10 M9008 Tuberculosis of bone Other                   icd10    
    #> 11 M9009 Tuberculosis of bone Site unspecified        icd10
    
    # ALT_CODE to ICD10_CODE
    reformat_icd10_codes(
      icd10_codes = m90_alt_code$code,
      all_lkps_maps = all_lkps_maps_dummy,
      input_icd10_format = "ALT_CODE",
      output_icd10_format = "ICD10_CODE"
    )
    #> The following 11 input ICD10 codes do not have a 1-to-1 ICD10_CODE-to-ALT_CODE mapping: 'M900', 'M9000', 'M9001', 'M9002', 'M9003', 'M9004', 'M9005', 'M9006', 'M9007', 'M9008', 'M9009'. There will therefore be *fewer* output than input codes
    #> [1] "M90.0"

UK Biobank

UK Biobank data also records ICD10 codes in ‘ALT_CODE’ format, with the exception of undivided 3 character ICD10 codes, where no ‘X’ is appended. For example, ‘I10X’ for Essential (primary) hypertension is recorded as ‘I10’ in UK Biobank.