<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v4i4e10436</article-id>
    <article-id pub-id-type="pmid">30545805</article-id>
    <article-id pub-id-type="doi">10.2196/10436</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Where No Universal Health Care Identifier Exists: Comparison and Determination of the Utility of Score-Based Persons Matching Algorithms Using Demographic Data</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Sanchez</surname>
          <given-names>Travis</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Dee</surname>
          <given-names>Jacob</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Rice</surname>
          <given-names>Brian</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Garcia Calleja</surname>
          <given-names>Jesus M</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Todd</surname>
          <given-names>Jim</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Waruru</surname>
        <given-names>Anthony</given-names>
      </name>
      <degrees>MPhil</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>Division of Global HIV and TB</institution>
        <institution>Centers for Disease Control and Prevention</institution>
        <addr-line>PO Box 606</addr-line>
        <addr-line>Nairobi, 00621</addr-line>
        <country>Kenya</country>
        <phone>254 722200179</phone>
        <email>awaruru@cdc.gov</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4281-6345</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Natukunda</surname>
          <given-names>Agnes</given-names>
        </name>
        <degrees>MSc</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1156-201X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Nyagah</surname>
          <given-names>Lilly M</given-names>
        </name>
        <degrees>MBChB</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3852-6695</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Kellogg</surname>
          <given-names>Timothy A</given-names>
        </name>
        <degrees>MPH</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0142-6508</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Zielinski-Gutierrez</surname>
          <given-names>Emily</given-names>
        </name>
        <degrees>Dr PH</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8412-0979</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Waruiru</surname>
          <given-names>Wanjiru</given-names>
        </name>
        <degrees>MBA, MPH</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8355-8004</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib7">
        <name name-style="western">
          <surname>Masamaro</surname>
          <given-names>Kenneth</given-names>
        </name>
        <degrees>MBChB, MSc</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3790-9037</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib8">
        <name name-style="western">
          <surname>Harklerode</surname>
          <given-names>Richelle</given-names>
        </name>
        <degrees>MPH</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7166-2325</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib9">
        <name name-style="western">
          <surname>Odhiambo</surname>
          <given-names>Jacob</given-names>
        </name>
        <degrees>MBChB</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4234-9740</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib10">
        <name name-style="western">
          <surname>Manders</surname>
          <given-names>Eric-Jan</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff6" ref-type="aff">6</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6027-0457</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib11">
        <name name-style="western">
          <surname>Young</surname>
          <given-names>Peter W</given-names>
        </name>
        <degrees>MPH</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7432-2876</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <label>1</label>
    <institution>Division of Global HIV and TB</institution>
    <institution>Centers for Disease Control and Prevention</institution>  
    <addr-line>Nairobi</addr-line>
    <country>Kenya</country></aff>
    <aff id="aff2">
    <label>2</label>
    <institution>Global Programs for Research and Training</institution>
    <institution>University of California San Francisco</institution>  
    <addr-line>San Francisco, CA</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <label>3</label>
    <institution>National AIDS and STI Control Program</institution>
    <institution>Ministry of Health</institution>  
    <addr-line>Nairobi</addr-line>
    <country>Kenya</country></aff>
    <aff id="aff4">
    <label>4</label>
    <institution>Institute for Global Health Sciences</institution>
    <institution>University of California San Francisco</institution>  
    <addr-line>San Francisco, CA</addr-line>
    <country>United States</country></aff>
    <aff id="aff5">
      <label>5</label>
      <institution>The Palladium Group</institution>
      <addr-line>Nairobi</addr-line>
      <country>Kenya</country>
    </aff>
    <aff id="aff6">
    <label>6</label>
    <institution>Division of Global HIV and TB</institution>
    <institution>Centers for Disease Control and Prevention</institution>  
    <addr-line>Atlanta, GA</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Anthony Waruru 
      <email>awaruru@cdc.gov</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Oct-Dec</season><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>13</day>
      <month>12</month>
      <year>2018</year>
    </pub-date>
    <volume>4</volume>
    <issue>4</issue>
    <elocation-id>e10436</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>16</day>
        <month>3</month>
        <year>2018</year>
      </date>
      <date date-type="rev-request">
        <day>7</day>
        <month>5</month>
        <year>2018</year>
      </date>
      <date date-type="rev-recd">
        <day>2</day>
        <month>7</month>
        <year>2018</year>
      </date>
      <date date-type="accepted">
        <day>16</day>
        <month>8</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Anthony Waruru, Agnes Natukunda, Lilly M Nyagah, Timothy A Kellogg, Emily Zielinski-Gutierrez, Wanjiru Waruiru, Kenneth Masamaro, Richelle Harklerode, Jacob Odhiambo, Eric-Jan Manders, Peter W Young. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 13.12.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://publichealth.jmir.org/2018/4/e10436/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>A universal health care identifier (UHID) facilitates the development of longitudinal medical records in health care settings where follow up and tracking of persons across health care sectors are needed. HIV case-based surveillance (CBS) entails longitudinal follow up of HIV cases from diagnosis, linkage to care and treatment, and is recommended for second generation HIV surveillance. In the absence of a UHID, records matching, linking, and deduplication may be done using score-based persons matching algorithms. We present a stepwise process of score-based persons matching algorithms based on demographic data to improve HIV CBS and other longitudinal data systems.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The aim of this study is to compare deterministic and score-based persons matching algorithms in records linkage and matching using demographic data in settings without a UHID.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We used HIV CBS pilot data from 124 facilities in 2 high HIV-burden counties (Siaya and Kisumu) in western Kenya. For efficient processing, data were grouped into 3 scenarios within (1) HIV testing services (HTS), (2) HTS-care, and (3) within care. In deterministic matching, we directly compared identifiers and pseudo-identifiers from medical records to determine matches. We used R stringdist package for Jaro, Jaro-Winkler score-based matching and Levenshtein, and Damerau-Levenshtein string edit distance calculation methods. For the Jaro-Winkler method, we used a penalty (р)=0.1 and applied 4 weights (ω) to Levenshtein and Damerau-Levenshtein: deletion ω=0.8, insertion ω=0.8, substitutions ω=1, and transposition ω=0.5.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>We abstracted 12,157 cases of which 4073/12,157 (33.5%) were from HTS, 1091/12,157 (9.0%) from HTS-care, and 6993/12,157 (57.5%) within care. Using the deterministic process 435/12,157 (3.6%) duplicate records were identified, yielding 96.4% (11,722/12,157) unique cases. Overall, of the score-based methods, Jaro-Winkler yielded the most duplicate records (686/12,157, 5.6%) while Jaro yielded the least duplicates (546/12,157, 4.5%), and Levenshtein and Damerau-Levenshtein yielded 4.6% (563/12,157) duplicates. Specifically, duplicate records yielded by method were: (1) Jaro 5.7% (234/4073) within HTS, 0.4% (4/1091) in HTS-care, and 4.4% (308/6993) within care, (2) Jaro-Winkler 7.4% (302/4073) within HTS, 0.5% (6/1091) in HTS-care, and 5.4% (378/6993) within care, (3) Levenshtein 6.4% (262/4073) within HTS, 0.4% (4/1091) in HTS-care, and 4.2% (297/6993) within care, and (4) Damerau-Levenshtein 6.4% (262/4073) within HTS, 0.4% (4/1091) in HTS-care, and 4.2% (297/6993) within care.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>Without deduplication, over reporting occurs across the care and treatment cascade. Jaro-Winkler score-based matching performed the best in identifying matches. A pragmatic estimate of duplicates in health care settings can provide a corrective factor for modeled estimates, for targeting and program planning. We propose that even without a UHID, standard national deduplication and persons-matching algorithm that utilizes demographic data would improve accuracy in monitoring HIV care clinical cascades.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>deterministic matching</kwd>
      <kwd>score-based matching</kwd>
      <kwd>HIV case-based surveillance</kwd>
      <kwd>unique case identification</kwd>
      <kwd>universal health care identifier</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In Sub-Saharan Africa, HIV case-based surveillance (CBS) has not yet been implemented to its full potential yet it is one of the recommended methods for second generation HIV surveillance [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Second generation surveillance systems advanced beyond initial epidemic monitoring approaches that focused on aggregate numbers to use of individual-level clinical data. Within CBS, individual patient demographic attributes can be linked to key clinical events over time allowing for individual tracking. Hence, HIV cases are tracked from (1) diagnosis, (2) linkage to care, (3) antiretroviral treatment (ART), (4) viral suppression, and (5) other outcomes such as retention in care, transfer-out, and loss to follow up or death. This level of follow up is useful for developing epidemiological profiles at the smallest geographical units [<xref ref-type="bibr" rid="ref3">3</xref>], monitoring of the HIV care and treatment clinical cascades, and measuring achievement of the Joint United Nations Program on HIV and AIDS (UNAIDS) Fast-Track 90-90-90 targets [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>Case-based surveillance has advantages over aggregate data reporting systems since it uses individual-level data, allowing for better tracking of treatment course and outcomes. Case-based surveillance can also more accurately show trends and event sequences in the HIV epidemic, for example, trends of time to linkage to treatment from HIV testing or even changes in the clinical cascade over time [<xref ref-type="bibr" rid="ref5">5</xref>]. Though CBS has been shown to be feasible in low resource settings [<xref ref-type="bibr" rid="ref6">6</xref>], accuracy in CBS is contingent upon unique patient identification and correct record linkage from HIV diagnosis through the treatment course, due to the longitudinal nature of HIV care and multiplicity of data sources and care settings. Moreover, record linkage is useful for attaching records to a residence and geographic locality for example, in demographic and health surveillance systems where individuals are tracked routinely in their households [<xref ref-type="bibr" rid="ref7">7</xref>], for data aggregation, and to facilitate correct assessment of program coverage.</p>
      <p>There are 2 broad approaches to matching and records linking by using personally identifiable information (demographic data matching) and using a universal health care identifier (UHID) assigned to uniquely identify persons within a health care setting. Some of the earlier use cases for persons matching include immunization programs [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], and in other settings where unique identification is important such as a national census [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Though less common in settings such as HIV care and treatment programs, unique patient identification has recently and increasingly become important as patient volume grows in these settings. In HIV care and treatment, patient volume continually increases and so does the need for electronic medical records (EMRs). There are commensurate benefits of EMRs over paper records such as improved patient care coordination and clinical decision support [<xref ref-type="bibr" rid="ref12">12</xref>]. Electronic medical records improve the capture of patient identifiers including UHIDs needed for longitudinal patient follow up. The utility of UHID for longitudinal follow up of patients has been demonstrated through correcting misclassification of the final patient outcomes such as loss to follow up in highly mobile populations. For example, in South Africa, a study among postpartum women found that a third may be misclassified as having been lost to care [<xref ref-type="bibr" rid="ref13">13</xref>]. As a chronic condition, HIV care entails the use of HIV services by patients at multiple locations over a lifetime. Additionally, individuals may get an HIV diagnosis at one facility and choose to engage in HIV care at another location, they may receive a diagnosis in more than one care setting, and patients may move HIV care locations with or without notifying health care staff.</p>
      <p>While UNAIDS recommends patient-centered colocation and integration of services across care settings such as antenatal care, tuberculosis, and HIV [<xref ref-type="bibr" rid="ref4">4</xref>], colocation is not always feasible and hence tracking patients across the cascade of treatment can be difficult without a UHID and reliable EMR. Even when a government identification document is issued at adulthood, use of its unique number for reproductive and health care services is limited by acceptance and excludes younger populations. Additionally, name and location matching may be used where patient details such as names and locator information exist [<xref ref-type="bibr" rid="ref14">14</xref>], but have limited utility in mobile populations. In the absence of a UHID, biometrics such as fingerprints are recommended [<xref ref-type="bibr" rid="ref15">15</xref>] and may be used among HIV infected patients receiving care [<xref ref-type="bibr" rid="ref16">16</xref>]. Other forms of patient identification, for example, the HIV comprehensive care clinic (CCC) medical record number used in Kenya suffers from low portability since they may not be permanent when a patient reinitiates care in a different facility. Program-identifiers have limited potential for a national surveillance system since they are unique to issuing facility. Hence, patients may acquire a new identifier when they transfer to a different facility resulting in unlinked data [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      <p>Given the chronic nature of HIV infection, integrating care across multiple service providers is essential. Nonetheless, unique patient identification in HIV programs, especially in Sub-Saharan Africa is rarely harmonized across service providers [<xref ref-type="bibr" rid="ref18">18</xref>]. Without a unique patient identifier, if name and location data are available, they may be used to link medical records [<xref ref-type="bibr" rid="ref14">14</xref>]. Therefore, demographic data have utility in records linkage. There are 2 types of algorithms for records matching, allowing for subsequent linkage and deduplication. The first is deterministic matching—a stepwise procedure in which sets of rules are used to pair up records based on actual or pseudo-identifiers identifying them as either a match or belonging to different persons. The second is score-based matching which refers to arithmetical models used to classify record pairs based on calculating a string distance measure quantifying how dissimilar 2 strings or words are to 1 another and applying a decision rule such as a score. The score is then used to determine whether duplicate records belong to the same individual.</p>
      <p>Persons matching using score-based demographic data matching algorithms may be feasible for patient clinical encounter data and surveillance where demographic data is documented. However, there is a lack of data on the utility of score-based demographic data matching methods and how they compare with deterministic matching in low-resource settings including Sub-Saharan Africa. We used data from a pilot of HIV case-based surveillance in Siaya and Kisumu—two high HIV-burden counties in western Kenya to (1) compare deterministic and score-based patient matching algorithms and (2) propose an efficient algorithm for deduplicating and uniquely identifying HIV cases in CBS data collection and reporting in Kenya and similar settings.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Setting</title>
        <p>This HIV case-based surveillance pilot was conducted between July 2015 and December 2015 in 124 facilities in Kisumu and Siaya counties. The facilities were selected to represent a variety of settings such as levels of care (dispensary, health center, subcounty, and county referral), use of an EMR versus paper records, and size of the patient population. Data were collected retrospectively to allow for at least four months of follow-up time from initial diagnosis, entry into care, or ART initiation within the study period. Data were collected by subcounty AIDS and sexually transmitted infections (STI) coordinators and Kenya Medical Research Institute (KEMRI) surveillance officers, and in some cases, facility staff. Data were entered from paper medical records and registers into the customized data entry platform for cases newly diagnosed or newly enrolled in HIV care from January through June 2015 using Android-based tablets and a standardized HIV case report form. Surveillance officers were trained in data collection using tablets and provided with login credentials. All surveillance officers signed a data confidentiality statement. As collected data contained patient names and other patient identifiers they were encrypted before transmission via a dedicated virtual private network in real-time to a server hosted on the Amazon cloud computing service. The staff at the National AIDS and STI Control Program (NASCOP) managed the data [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <p>A case was defined minimally to include the date of diagnosis, age at diagnosis, gender, first name, and surname. Cases originated from the following 3 scenarios and analytical frameworks relating to the HIV care cascade. The first scenario is within HIV testing services (HTS). This scenario accommodates cases found within the same facility (cases that were tested at the facility and retested at the same facility hence having different dates of diagnosis). It also included cases that moved to a different facility (cases that tested at one facility and retested at a different facility). The second scenario is HTS-care. This accommodated HTS-to-care scenario in which cases were tested and linked within the same facility. It also included HTS-to-care cases that would be tested at one facility and then linked to care in a different facility. These 2 scenarios accounted for movement of persons diagnosed with HIV and accessing care within the same facility and clients that may test at one facility and access care in a different facility. The third scenario is within care scenarios. This included referrals and linkages from one facility to another. Similar to HTS-to-care linkage scenarios some cases had enrolled into care in one facility and throughout care transferred to another facility. However, HTS was not a source of data for the diagnosis information, and hence we did not have any testing location information for these cases.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>Methods for data collection are described in the HIV case-based surveillance pilot report [<xref ref-type="bibr" rid="ref19">19</xref>]. Briefly, data were extracted prospectively for everyone newly diagnosed or enrolled in care in a given 6-month period in the participating facilities and subsequent updating of sentinel events for those individuals. At the end of the pilot, we had 12,260 records but excluded 100 which had a missing date of diagnosis and 3 which had a missing date of birth before matching (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p>
      </sec>
      <sec>
        <title>Data Preparation and Standardization</title>
        <p>We created analytical groups—also called “blocking” according to the scenarios described in the study setting before carrying out matching analyses to allow for comparability and faster processing,</p>
        <p>Before carrying out matching processes, we standardized patient identifying fields used in matching. First, all blank spaces, commas, apostrophes, and dashes were stripped from first names middle names and surnames. Second, all string fields were converted to lower case. A Soundex [<xref ref-type="bibr" rid="ref20">20</xref>] was created for the first names in all records since the first names are mostly of English origin. Third, we created double metaphone for middle names and surnames. Fourth, the year of birth was standardized to a four-digit number.</p>
        <p>A potential patient identifier for the deduplication process is CCC number which is a unique patient number assigned at first clinical encounter once an HIV-infected patient has gone through triage and is ready for enrolment into a facility-managed HIV program. The CCC number is an 11-character code comprising a 5-digit unique facility code followed by a separator and a 5-digit sequentially facility-assigned unique number. We standardized CCC numbers to consider variations in recording (eg, use of spaces, slashes, dashes, adding leading zeros, and commas).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Number of records used for deterministic and probabilistic matching, HIV case-based surveillance in Kenya (2015).</p>
          </caption>
          <graphic xlink:href="publichealth_v4i4e10436_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Deterministic Matching</title>
        <p>We used the following fields in deterministic matching (1) the first name, (2) surname, (3) gender, and (4) year of birth. To reduce mismatching due to variation in spellings of English first names, we used Soundex. We then created a “pseudo-unique key” combining the resulting Soundex values as well as gender, surname, and year of birth. The CCC numbers were used to match care records that were missed by using the “pseudo-unique key.”</p>
      </sec>
      <sec>
        <title>Score-Based Matching</title>
        <p>We separated the data according to the “blocking” scenarios described in the deterministic process. These blocking scenarios are necessary so that comparisons are made among potentially related records. We used R (an open-source software) in our study since it provided programming flexibility to implement the matching string preparation and matching process. We created a matching key field by including the data elements (1) first character of gender at birth, (2) Soundex of the first name, (3) secondary double metaphone of middle name, (4) secondary double metaphone of surname, and (5) year of birth. This produced strings such as “fF465aknannk1983,” “fI650aknkannk1990” (where middle name secondary double metaphone was available), and “fG620ans1994” (where secondary double metaphone of the middle name was unavailable). We then implemented Jaro and Jaro-Winkler string matching and Levenshtein and Damerau-Levenshtein string edit distance algorithms in the R stringdist package [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. String score-based matching was conducted using ratios of matching strings, and a penalty was applied for the first 4 characters when Jaro-Winkler algorithm is used as in the formula (<xref ref-type="fig" rid="figure2">Figure 2</xref>) where <italic>d</italic><sub>j</sub> is the Jaro-Winkler distance score, <italic>m</italic> is the number of matching characters, &#124;<italic>s</italic><sub>1</sub>&#124; is length of string 1, &#124;<italic>s</italic><sub>2</sub>&#124; is length of string 2 and <italic>t</italic> is half the total transpositions or the number of matching (but different sequence order) characters divided by 2. String edit distance calculations, on the other hand, quantify how different 2 strings or words are to one another by counting the minimum number of deletions, insertions, substitutions and transposition operations required to transform 1 string into the other. Score-based methodologies are based on the Fellegi-Sunter linkage rule that classifies a record pair as matching or nonmatching [<xref ref-type="bibr" rid="ref11">11</xref>]. The score level to determine a match is determined a priori or based on experience by the user and dependent on the setting. For our case, a score of 98% and above was considered sufficient to determine a match. When we implemented the Jaro and Jaro-Winkler methods, we set a standard penalty factor to 0.1. This penalizes matches based on similarity at the beginning of the string to give favorable ratings to strings that match from the beginning for a set prefix length of up to 4 characters according to Winkler and Cohen [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. The penalty factor is added to discount matches that are found based on up to a maximum of first 4 characters since in string writing, the person recording is more likely to make an error after the first 4 characters. We considered the 4 weights (ω) applicable to the Levenshtein and Damerau-Levenshtein methods (1) deletion (ω=0.8), (2) insertion (ω=0.8), (3) substitutions (ω=1), and (4) transposition (ω=0.5). For the Levenshtein method, the penalty for substitution is ignored [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        <p>Due to possibilities of age variations for the same person accessing HTS and care services at differing periods, the numeric comparator age, with a variation of not more than 12 months within identified matches was considered sufficiently close for confirming a match. We compared deterministic and score-based processes for unique case identification regarding the number of matches yielded and the deduplication extent achieved within the scenarios. We also assessed match yield when HTS and HTS-care records were treated as mutually exclusive versus as a combined set. Regardless of approach, total yield was a sum of duplicates from all scenarios.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The Jaro-Winkler equation.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i4e10436_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Postmatch Processing</title>
        <p>Based on the date of HIV diagnosis, we carried out extra steps to determine how to retain unique records after the matching process. If the retained and duplicate records had conflicting dates of diagnosis, we retained the records with the earliest date of diagnosis. For retained records, we maximized completeness of data for all fields by comparing with the duplicate records. Whenever a retained record had missing data that was in duplicate record, an append merge was carried out to overwrite missing values with the nonmissing value from the matched record.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Ethical approval was obtained from the KEMRI (SSC #2827) and the Office of the Associate Director for Science, Centers for Disease Control and Prevention (CDC) with tracking #2014-136. Access to data used in these analyses was password protected, and all study coordinators, data abstractors, and analysts signed a confidentiality form.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>HIV Case Records and Demographic Data Variables</title>
        <p>A total of 12,260 records were collected. We excluded 100 (0.8%) records due to missing dates of diagnosis, and 3 (0%) missing the date of birth (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The final data set used for the matching exercise included 12,157 records representing adult and pediatric cases. From these records and before data deduplication, 33.5% (4073/12,157), 9.0% (1091/12,157), and 57.5% (6,993/12,157) corresponded to HTS, HTS-care and within care scenarios respectively. In <xref ref-type="table" rid="table1">Table 1</xref>, completeness and uniqueness of variables used to construct score-based matching string are presented. In the entire data set, gender, year of birth, first name and surname were 100% complete while the middle name was missing for 38% of the records. First names were less unique than surnames: 8.2% (1002/12,157) versus 19.1% (2321/12,157). When Soundex was applied to standardize the English first names, 273/12,157 (2.2%) remained unique compared to 1002/12,157 (8.2%) of the original unstandardized format. When secondary double metaphone was applied to standardize the middle and surnames, 2.6% (316/8772) and 3.1% (373/12,157) respectively remained unique compared to 13.1% (1150/8,772) and 19.1% (2321/12,157) of the original unstandardized format. The similarity of names varied by setting (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
      </sec>
      <sec>
        <title>Matches Identified</title>
        <p>Out of the 12,260 records, 12,157 (99.2%) were used in the analyses. Using the deterministic method, 67/12,157 (1.6%) records were matches in HTS, 164/12,157 (15.0%) in HTS-care, and 204/12,157 (2.9%) in the care-only scenario. This yielded a total of 435/12,157 (3.6%) matches and 11,722 unique cases across the testing and, care and treatment cascade (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <p>Overall, of the score-based methods, Jaro-Winkler yielded the most duplicate records (686/12,157, 5.6%), Jaro yielded the fewest (546/12,157, 4.5%), and both Levenshtein and Damerau-Levenshtein yielded the same number (563/12,157, 4.6%). Specifically, duplicate records yielded by method were: (1) Jaro 5.7% (234/4073) within HTS, 0.4% (4/1,091) in HTS-care, and 4.4% (308/6993) within care, (2) Jaro-Winkler 7.4% (302/4073) within HTS, 0.5% (6/1091) in HTS-care, and 5.4% (378/6993) within care, (3) Levenshtein 6.4% (262/4073) within HTS, 0.4% (4/1091) in HTS-care, 4.2% (297/6993) within care, and (4) Damerau-Levenshtein 6.4% (262/4073) within HTS, 0.4% (4/1091) in HTS-care, and 4.2% (297/6993) within care.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Completeness and uniqueness of demographic fields used in the matching process for HIV case-based surveillance in Kenya 2015 (N=12,157).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="250"/>
            <col width="250"/>
            <col width="200"/>
            <thead>
              <tr valign="bottom">
                <td>Fields</td>
                <td>Completeness (%)</td>
                <td>Unique<sup>a</sup> (n)</td>
                <td>Out of n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Gender<sup>b</sup></td>
                <td>100</td>
                <td>2</td>
                <td>12,157 (0)</td>
              </tr>
              <tr valign="top">
                <td>Year of birth</td>
                <td>100</td>
                <td>6</td>
                <td>12,157 (0)</td>
              </tr>
              <tr valign="top">
                <td>First name</td>
                <td>100</td>
                <td>1002</td>
                <td>12,157 (8.2)</td>
              </tr>
              <tr valign="top">
                <td>Soundex of first name</td>
                <td>100</td>
                <td>273</td>
                <td>12,157 (2.2)</td>
              </tr>
              <tr valign="top">
                <td>Middle name</td>
                <td>72</td>
                <td>1150</td>
                <td>8772 (13.1)</td>
              </tr>
              <tr valign="top">
                <td>Phonetic middle name<sup>c</sup></td>
                <td>72</td>
                <td>316</td>
                <td>8772 (3.6)</td>
              </tr>
              <tr valign="top">
                <td>Surname</td>
                <td>100</td>
                <td>2321</td>
                <td>12,157 (19.1)</td>
              </tr>
              <tr valign="top">
                <td>Phonetic surname<sup>c</sup></td>
                <td>100</td>
                <td>373</td>
                <td>12,157 (3.1)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>Unique refers to similar occurrences of the field (eg, only two types of gender).</p>
          </fn>  
          <fn id="table1fn2">
            <p><sup>b</sup>Two statuses possible (male or female).</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>Secondary double metaphones for standardizing Kenyan native names.</p>
          </fn></table-wrap-foot>
        </table-wrap>

        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Scenarios in HIV diagnosis, care and treatment cascade, and deduplication yield for HIV case-based surveillance in Kenya 2015.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="290"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Scenarios</td>
                <td>Deterministic matching method, n (%)</td>
                <td colspan="4">Matches identified for each score-based matching algorithm, n (%)</td>
              </tr>
              <tr valign="top">
              <td><break/></td>  
              <td><break/></td>
              <td>Jaro</td>
              <td>Jaro-Winkler</td>
              <td>Levenshtein</td>
              <td>Damerau-Levenshtein</td></tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>All<sup>a</sup> (N=12,157)</td>
                <td>435 (3.6)</td>
                <td>546 (4.5)</td>
                <td>686 (5.6)</td>
                <td>563 (4.6)</td>
                <td>563 (4.6)</td>
              </tr>
              <tr valign="top">
                <td>HTS<sup>b</sup> (n=4037)</td>
                <td>67 (1.6)</td>
                <td>234 (5.7)</td>
                <td>302 (7.4)</td>
                <td>262 (6.4)</td>
                <td>262 (6.4)</td>
              </tr>
              <tr valign="top">
                <td>HTS-care<sup>c</sup> (n=1091)</td>
                <td>164 (15.0)</td>
                <td>4 (0.4)</td>
                <td>6 (0.5)</td>
                <td>4 (0.4)</td>
                <td>4 (0.4)</td>
              </tr>
              <tr valign="top">
                <td>Care only<sup>d</sup> (n=6993)</td>
                <td>204 (2.9)</td>
                <td>308 (4.4)</td>
                <td>378 (5.4)</td>
                <td>297 (4.2)</td>
                <td>297 (4.2)</td>
              </tr>
              <tr valign="top">
                <td>Unique<sup>e</sup></td>
                <td>11,722 (96.4)</td>
                <td>11,611 (95.5)</td>
                <td>11,471 (94.4)</td>
                <td>11,594 (95.4)</td>
                <td>11,594 (95.4)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Summed up for all the scenarios.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>HTS: HIV testing services (records where data were primarily from the HTS setting and the records contained HIV diagnosis data only).</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>HTS-care (records that contained both HTS and HIV care information).</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>Care only (records from primarily HIV care with no additional HTS records).</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>Unique records after deduplication.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        
      </sec>
      <sec>
        <title>Jaro-Winkler Yield for Mutually Exclusive and Combined Data Sets</title>
        <p>A comparison of Jaro-Winkler yield for mutually exclusive and data sets that were combined across the scenarios is presented in <xref ref-type="fig" rid="figure3">Figure 3</xref>. When scenarios were treated as mutually exclusive, Jaro-Winkler score-based matching algorithm yielded 7.0% (302/4073) matches in the HTS scenario, 1% (6/4073) in the HTS-care scenario compared to a higher yield of 7.1 % (368/5164) when the 2 scenarios were treated as 1 block.</p>
      </sec>
      <sec>
        <title>Steps for Score-Based Matching and Considerations</title>
        <p>Based on the outcomes of the score-based matching process, we propose a procedure comprising of 7-steps that is easy to apply to quickly match and link unique cases across HIV care settings (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). To decide whether or not to use demographic data matching, we propose a decision model (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p>
        
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Percent match yield by blocking scenarios using Jaro-Winkler score-based matching, HIV case-based surveillance in Kenya (2015). HTS: HIV testing services; HTS-Care: records from HTS-care scenarios; Care only: records from care scenarios only. A ∩ B indicates that the intersection of HTS and HTS-case records yields 386 matches (18% more matches than in mutually exclusive matching).</p>
          </caption>
          <graphic xlink:href="publichealth_v4i4e10436_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
        <boxed-text id="box1" position="float">
          <title>Expandable simplified steps used in the demographic data matching process.</title>
          <p><bold>Step 1: Select data sources</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Select data sources with common fields</p>
            </list-item>
            <list-item>
              <p>If additional sources are available, add to the list</p>
            </list-item>
          </list>
          <p><bold>Step 2: Prepare the data</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Cleaning and coding</p>
            </list-item>
            <list-item>
              <p>Standardizing fields</p>
            </list-item>
          </list>
          <p><bold>Step 3: Create a match-string</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Ensure mutually exclusive blocks</p>
            </list-item>
            <list-item>
              <p>Test internal validity</p>
            </list-item>
          </list>
          <p><bold>Step 4: Create blocks</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Ensure mutually exclusive blocks</p>
            </list-item>
            <list-item>
              <p>Test internal validity</p>
            </list-item>
          </list>
          <p><bold>Step 5: Run the matching algorithm</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Apply a match rate of ≥98%</p>
            </list-item>
            <list-item>
              <p>Test in a small identified data set and adjust the match rate</p>
            </list-item>
          </list>
          <p><bold>Step 6: Merge the data</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>Update records that need an update</p>
            </list-item>
            <list-item>
              <p>Create a master patient index</p>
            </list-item>
          </list>
          <p><bold>Step 7: Adjudication, quality checks, and use cases</bold></p>
          <list list-type="bullet">
            <list-item>
              <p>For care coordination, recheck that the matches are correct</p>
            </list-item>
            <list-item>
              <p>For surveillance and indicator reporting, use a combination of the matched but deduplicated records and the unmatched records</p>
            </list-item>
          </list>
        </boxed-text>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Decision model for when to use score-based matching.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i4e10436_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Universal health care identifiers are recommended and ideal for patient-centered monitoring and CBS [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. However, in low resource settings, their use is limited. In the interim, demographic data score-based matching algorithms can play an important role in improving the quality of CBS data as well as patient-centered care. We have demonstrated that score-based methods succeeded in patients matching and identifying more matches compared to the deterministic process. It is possible to match cases, merge sentinel events, and enhance the completeness of individual deduplicated data using this process. Consequently, this improves accuracy in CBS and other longitudinal encounter data. The process also has a dual utility of allowing better care coordination and patient management at the facility level and improved HIV surveillance at a higher subnational or national level. The matching process can be inbuilt in EMRs and at patient registries to allow for lookup of already registered patients at the facility level. This may improve processes, patient flow and avoid unnecessary double entry. We also demonstrate that we can do enough matching in the absence of a UHID to move ahead with CBS implementation in low-resource settings such as Kenya. As such, lack of a UHID should not stifle movement towards the use of CBS.</p>
      </sec>
      <sec>
        <title>Score-Based Matching Yield</title>
        <p>Our study compared 4 variants of score-based string-distance matching methods. The Jaro-Winkler distance method was found to perform better in score-based matching since it gave the best yield while considering common spelling mistakes and logical combination of demographic fields. In developed countries, it has been shown that about 5% to 10% of medical records may be duplicate [<xref ref-type="bibr" rid="ref26">26</xref>], which compares well with our results. Jaro-Winkler has been proposed as a method over other string-matching algorithms since it was designed with relatively short strings in mind [<xref ref-type="bibr" rid="ref21">21</xref>], hence may be best suited to our setting. In addition, it works well when the name beginnings are the same [<xref ref-type="bibr" rid="ref27">27</xref>]. For that reason, we standardized beginning of the match strings by using a Soundex of the English names and using secondary double metaphone of middle and surnames. Further, a decision was made to add the first character of gender at birth to the beginning of the string to improve the accuracy of the matching score.</p>
      </sec>
      <sec>
        <title>Application Considerations</title>
        <p>Although we used R in our analyses and matching process, open source software such as CDC Registry Plus Linkplus [<xref ref-type="bibr" rid="ref28">28</xref>], which was originally developed for cancer registries has been explored in low-resource HIV care settings for example in Haiti [<xref ref-type="bibr" rid="ref29">29</xref>]. Other Web-based applications that have utility for fuzzy matching and record cleaning, for example, Freely Extensible Biomedical Record Linkage [<xref ref-type="bibr" rid="ref6">6</xref>], may have potential. However, post-match processing is necessary to achieve a high degree of true matches. A certain degree of human adjudication may be necessary especially when testing the algorithms. Users of off-the-shelf solutions such as Linkplus should take caution since many mismatches may be likely to be true matches [<xref ref-type="bibr" rid="ref29">29</xref>]. The use of current English name-based Soundex algorithms is not appropriate for Kenyan names. In creating unique identifiers that contain a Soundex component, variations of the first name can yield a different Soundex since the first character is always part of the Soundex [<xref ref-type="bibr" rid="ref20">20</xref>]. A visual inspection of matches based solely on Soundex of first and surname showed a high false-positive rate. Research on how to construct a Soundex algorithm for Kenyan names may be useful as has been successfully done in Japan, India, and South Africa [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. We determined that using a double metaphone had discriminatory power for Kenyan names and hence we used it for middle and surnames.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study has several limitations. First, the choice of a combination of several fields for a concatenated “pseudo-unique key” may not be optimal. However, we developed the matching string taking advantage of existing identifiers in our data. First names in Kenya are usually English baptismal or anglicized names. We took advantage of this to standardize names that are misspelled using Soundex. Other challenges include manual transcription errors during patient transfers and assigning of new numbers for transfer-in patients. Despite these limitations, we were able to merge the cases based on the names, gender, date of birth and CCC number in the within care scenario and hence identify potential matches in the deterministic process. Finally, many studies have applied common measures of validity such as positive predictive value, sensitivity, and specificity [<xref ref-type="bibr" rid="ref33">33</xref>]. Unlike those studies, we did not have a gold standard for comparison in the pilot.</p>
        <p>The choice of which string distance score-based algorithm to use largely depends on the nature of the match strings and the nature of typographic errors [<xref ref-type="bibr" rid="ref21">21</xref>]. Choice of the matching string is therefore important. For example, deterministic matching yielded more duplicates for the HTS to care scenario (15%) compared to 4.6% to 7.1% across the score-based methods. This may be because a rigorous manual assessment of possible matches was done using the CCC numbers such that matches within the HTS to care scenario were more efficiently captured. Minimalistic demographic fields were used in score-based matching across all scenarios, and the CCC number was not included in the process.</p>
      </sec>
      <sec>
        <title>Conclusions and Recommendations</title>
        <p>There has been an ongoing discussion and suggested approaches for countries to consider in developing UHIDs [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. If, and when implemented, UHIDs would have the highest potential to mitigate challenges with a unique identification and record linkage for an expanded national CBS system. This benefit extends to other health sectors as countries move towards universal health care. The recent World Health Organization guidelines for patient-centered monitoring advocate for using unique patient identifiers instead of names [<xref ref-type="bibr" rid="ref25">25</xref>]. However, where there is no UHID, a unique patients’ deduplication algorithm based on available demographic data is necessary and feasible. Such an algorithm would improve monitoring of the HIV epidemic including the UNAIDS Fast-Track 90-90-90 targets.</p>
        <p>We propose a stepwise process that builds up from first identifying data sources and blocking scenarios. This should be followed by an examination of the data quality using completeness as a measure coupled with quality improvement measures through routine data quality audits. The next step involves developing a matching key, lower-level deduplication and finally cross-examination, validation and sending of CBS data to the national level for surveillance. Although validation of the score-based approach is a necessary extra step, this may be best done with data sets from settings where a gold standard is available such as those utilizing biometric finger vein technologies for patient identification. Given that these settings are rare, we suggest that programs identify a percentage that best suits their setting and resources for validation purposes. A decision model such as the one presented in <xref ref-type="fig" rid="figure4">Figure 4</xref> may help programs to decide whether or not to use demographic data matching. Comparing score-based matches to gold standard data in Kenya and similar settings offer an opportunity for future work in search of alternatives for patient matching. In the meantime, score-based demographic data matching has utility for improving the quality of data in monitoring the 90-90-90 cascade and in other health care settings where patients are longitudinally followed.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ART</term>
          <def>
            <p>antiretroviral treatment</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CBS</term>
          <def>
            <p>case-based surveillance</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CCC</term>
          <def>
            <p>comprehensive care clinic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HTS</term>
          <def>
            <p>HIV testing services</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">KEMRI</term>
          <def>
            <p>Kenya Medical Research Institute</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NASCOP</term>
          <def>
            <p>National AIDS and STI Control Program</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">STI</term>
          <def>
            <p>sexually transmitted infection</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">UHID</term>
          <def>
            <p>universal health care identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">UNAIDS</term>
          <def>
            <p>Joint United Nations Program on HIV and AIDS</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We acknowledge the Kisumu and Siaya county and subcounty HIV/AIDS and STI coordinators and the KEMRI surveillance officers who helped with data collection.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>AW conceived the idea for this manuscript and prepared the concept, data analyses and wrote the first and subsequent drafts of the manuscript. AN, helped with data analyses. PWY, AN, TAK, WW, KM, and EZG provided extensive comments on the concept and manuscript drafts. LMN, JO, PWY, and EJM provided insights on policy implications and recommendations. RH and KM supervised the data abstraction process. All authors read the manuscript, provided feedback, and approved the final version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
        <source>Guidelines for national human immunodeficiency virus case surveillance, including monitoring for human immunodeficiency virus infection and acquired immunodeficiency syndrome</source>  
        <year>1999</year>  
        <month>12</month>  
        <day>10</day>  
        <access-date>2018-03-16</access-date>
        <publisher-loc>Atlanta</publisher-loc>
        <publisher-name>Centers for Disease Control and Prevention</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/rr4813a1.htm">https://www.cdc.gov/mmwr/preview/mmwrhtml/rr4813a1.htm</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="71lWESR9h"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>UNAIDS/WHO Working Group on Global HIV/AIDSSTI Surveillance</collab>
        </person-group>
        <source>Guidelines for Second Generation HIV Surveillance-an update: Know your epidemic</source>  
        <year>2011</year>  
        <access-date>2018-10-29</access-date>
        <publisher-loc>Geneva</publisher-loc>
        <publisher-name>World Health Organization and Joint United Nations Programme on HIV/AIDS</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.who.int/hiv/pub/surveillance/en/cds_edc_2000_5.pdf">http://www.who.int/hiv/pub/surveillance/en/cds_edc_2000_5.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="73Wjr8x8U"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
        <source>Joint United Nations Programme on HIV/AIDS (UNAIDS)</source>  
        <year>2015</year>  
        <access-date>2018-03-16</access-date>
        <comment>On the Fast-Track to end AIDS by 2030: Focus on location and population 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.unaids.org/sites/default/files/media_asset/WAD2015_report_en_part01.pdf">http://www.unaids.org/sites/default/files/media_asset/WAD2015_report_en_part01.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lRSNIXx"/></comment> </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
        <source>Joint United Nations Programme on HIV/AIDS (UNAIDS)</source>  
        <access-date>2018-03-16</access-date>
        <comment>UNAIDS 2016-2021 Strategy: On the Fast-Track to end AIDS, 2015 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.unaids.org/sites/default/files/media_asset/20151027_UNAIDS_PCB37_15_18_EN_rev1.pdf">http://www.unaids.org/sites/default/files/media_asset/20151027_UNAIDS_PCB37_15_18_EN_rev1.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lS2dPF9"/></comment> </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rehle</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Lazzari</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Dallabetta</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Asamoah-Odei</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Second-generation HIV surveillance: better data for decision-making</article-title>
        <source>Bull World Health Organ</source>  
        <year>2004</year>  
        <month>02</month>  
        <volume>82</volume>  
        <issue>2</issue>  
        <fpage>121</fpage>  
        <lpage>7</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.scielosp.org/scielo.php?script=sci_arttext&#38;pid=S0042-96862004000200009&#38;lng=en&#38;nrm=iso&#38;tlng=en"/>
        </comment>  
        <pub-id pub-id-type="medline">15042234</pub-id>
        <pub-id pub-id-type="pii">S0042-96862004000200009</pub-id>
        <pub-id pub-id-type="pmcid">PMC2585900</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Harklerode</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Schwarcz</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hargreaves</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Boulle</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Todd</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xueref</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Rice</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Feasibility of Establishing HIV Case-Based Surveillance to Measure Progress Along the Health Sector Cascade: Situational Assessments in Tanzania, South Africa, and Kenya</article-title>
        <source>JMIR Public Health Surveill</source>  
        <year>2017</year>  
        <month>07</month>  
        <day>10</day>  
        <volume>3</volume>  
        <issue>3</issue>  
        <fpage>e44</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://publichealth.jmir.org/2017/3/e44/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/publichealth.7610</pub-id>
        <pub-id pub-id-type="medline">28694240</pub-id>
        <pub-id pub-id-type="pii">v3i3e44</pub-id>
        <pub-id pub-id-type="pmcid">PMC5525003</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Christen</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Churches</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <source>A probabilistic deduplication, record linkage and geocoding system</source>  
        <year>2005</year>  
        <access-date>2018-03-16</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/70a4/d632a60edbcec6e7cc787812e7e425995552.pdf">https://pdfs.semanticscholar.org/70a4/d632a60edbcec6e7cc787812e7e425995552.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="71lSSfVr5"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
        <source>National Center for Immunization and Respiratory Disease (NCIRD)</source>  
        <year>2013</year>  
        <access-date>2018-03-16</access-date>
        <comment>Immunization information systems patient-level de-duplication best practices 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/vaccines/programs/iis/interop-proj/downloads/de-duplication.pdf">https://www.cdc.gov/vaccines/programs/iis/interop-proj/downloads/de-duplication.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lSXZIMC"/></comment> </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Angeloni</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Probabilistic Record Matching and Deduplication Using Open Source Software</article-title>
        <year>2004</year>  
        <conf-name>Immunization Registry Conference</conf-name>
        <conf-date>October 19, 2004</conf-date>
        <conf-loc>Atlanta</conf-loc></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jaro</surname>
            <given-names>MA</given-names>
          </name>
        </person-group>
        <article-title>Advances in Record-Linkage Methodology as Applied to Matching the 1985 Census of Tampa, Florida</article-title>
        <source>Journal of the American Statistical Association</source>  
        <year>1989</year>  
        <month>06</month>  
        <volume>84</volume>  
        <issue>406</issue>  
        <fpage>414</fpage>  
        <lpage>420</lpage>  
        <pub-id pub-id-type="doi">10.1080/01621459.1989.10478785</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>William</surname>
            <given-names>EW</given-names>
          </name>
          <name name-style="western">
            <surname>Thibaudeau</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <source>Research Report</source>  
        <access-date>2018-03-16</access-date>
        <comment>An application of the Fellegi-Sunter model of record linkage to the 1990 US decennial census 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/srd/papers/pdf/rr91-9.pdf">https://www.census.gov/srd/papers/pdf/rr91-9.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lSnnTnB"/></comment> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Oluoch</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Katana</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ssempijja</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Kwaro</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Langat</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Kimanga</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Okeyo</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Abu-Hanna</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>de</surname>
            <given-names>KN</given-names>
          </name>
        </person-group>
        <article-title>Electronic medical record systems are associated with appropriate placement of HIV patients on antiretroviral therapy in rural health facilities in Kenya: a retrospective pre-post study</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2014</year>  
        <month>11</month>  
        <volume>21</volume>  
        <issue>6</issue>  
        <fpage>1009</fpage>  
        <lpage>14</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24914014"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2013-002447</pub-id>
        <pub-id pub-id-type="medline">24914014</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2013-002447</pub-id>
        <pub-id pub-id-type="pmcid">PMC4215039</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Clouse</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Vermund</surname>
            <given-names>SH</given-names>
          </name>
          <name name-style="western">
            <surname>Maskew</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Lurie</surname>
            <given-names>MN</given-names>
          </name>
          <name name-style="western">
            <surname>MacLeod</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Malete</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Carmona</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sherman</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Fox</surname>
            <given-names>MP</given-names>
          </name>
        </person-group>
        <article-title>Mobility and Clinic Switching Among Postpartum Women Considered Lost to HIV Care in South Africa</article-title>
        <source>J Acquir Immune Defic Syndr</source>  
        <year>2017</year>  
        <month>12</month>  
        <day>01</day>  
        <volume>74</volume>  
        <issue>4</issue>  
        <fpage>383</fpage>  
        <lpage>389</lpage>  
        <pub-id pub-id-type="doi">10.1097/QAI.0000000000001284</pub-id>
        <pub-id pub-id-type="medline">28225717</pub-id>
        <pub-id pub-id-type="pii">00126334-201704010-00006</pub-id>
        <pub-id pub-id-type="pmcid">PMC5324708</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dusetzina</surname>
            <given-names>SB</given-names>
          </name>
          <name name-style="western">
            <surname>Tyree</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Meyer</surname>
            <given-names>A-M</given-names>
          </name>
          <name name-style="western">
            <surname>Meyer</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Green</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Carpenter</surname>
            <given-names>WR</given-names>
          </name>
        </person-group>
        <source>Linking Data for Health Services Research: A Frame work and Instructional Guide</source>  
        <year>2014</year>  
        <access-date>2018-03-16</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK253313/pdf/Bookshelf_NBK253313.pdf">https://www.ncbi.nlm.nih.gov/books/NBK253313/pdf/Bookshelf_NBK253313.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="71lT6Ilex"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kabudula</surname>
            <given-names>CW</given-names>
          </name>
          <name name-style="western">
            <surname>Clark</surname>
            <given-names>BD</given-names>
          </name>
          <name name-style="western">
            <surname>Gómez-Olivé</surname>
            <given-names>FX</given-names>
          </name>
          <name name-style="western">
            <surname>Tollman</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Menken</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Reniers</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>The promise of record linkage for assessing the uptake of health services in resource constrained settings: a pilot study from South Africa</article-title>
        <source>BMC Med Res Methodol</source>  
        <year>2014</year>  
        <month>05</month>  
        <day>24</day>  
        <volume>14</volume>  
        <fpage>71</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-14-71"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2288-14-71</pub-id>
        <pub-id pub-id-type="medline">24884457</pub-id>
        <pub-id pub-id-type="pii">1471-2288-14-71</pub-id>
        <pub-id pub-id-type="pmcid">PMC4041350</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Otieno</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>The Star, Kenya Sep</source>  
        <access-date>2018-03-16</access-date>
        <comment>HIV patients to be recorded biometrically 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.the-star.co.ke/news/2014/09/11/hiv-patients-to-be-recorded-biometrically_c1002083">https://www.the-star.co.ke/news/2014/09/11/hiv-patients-to-be-recorded-biometrically_c1002083</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lTJgeZi"/></comment> </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Beck</surname>
            <given-names>EJ</given-names>
          </name>
          <name name-style="western">
            <surname>Shields</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Tanna</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Henning</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>de Vega</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Andrews</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Boucher</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Benting</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Garcia-Calleja</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Cutler</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ewing</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Kijsanayotin</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Kujinga</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Mahy</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Makofane</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Marsh</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Nacheeva</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Rangana</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Vega</surname>
            <given-names>MFR</given-names>
          </name>
          <name name-style="western">
            <surname>Sabin</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Varetska</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Macharia Wanyee</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Watiti</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Nunez</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Ghys</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Low-Beer</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Developing and implementing national health identifiers in resource limited countries: why, what, who, when and how?</article-title>
        <source>Glob Health Action</source>  
        <year>2018</year>  
        <month>03</month>  
        <volume>11</volume>  
        <issue>1</issue>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29502484"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1080/16549716.2018.1440782</pub-id>
        <pub-id pub-id-type="medline">29502484</pub-id>
        <pub-id pub-id-type="pmcid">PMC5912435</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>World Health Organization</collab>
        </person-group>
        <source>IMAI and IMCI tools</source>  
        <year>2007</year>  
        <access-date>2018-03-16</access-date>
        <publisher-loc>Geneva</publisher-loc>
        <publisher-name>WHO</publisher-name>
        <comment>Briefing Package Integrated Approach to HIV Prevention, Care and Treatment 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.who.int/hiv/capacity/ImaiBriefingStrategyAug2007Sm.pdf">http://www.who.int/hiv/capacity/ImaiBriefingStrategyAug2007Sm.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lU5fPcG"/></comment> </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <collab>National AIDS and STI Control Programme (NASCOP)</collab>
        </person-group>
        <article-title>Case Based Surveillance of HIV in Kenya: Results of a Pilot Conducted in Kisumu and Siaya Counties, 2015</article-title>
        <source>Case Based Surveillance of HIV in Kenya</source>  
        <year>2016</year>  
        <publisher-loc>Nairobi</publisher-loc>
        <publisher-name>Ministry of Health</publisher-name></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
        <source>The US National Archives</source>  
        <year>2007</year>  
        <access-date>2018-03-16</access-date>
        <comment>The Soundex Indexing System 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.archives.gov/research/census/soundex.html">https://www.archives.gov/research/census/soundex.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lUHWSth"/></comment> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>van der Loo</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>The R Journal</source>  
        <access-date>2018-03-16</access-date>
        <comment>The stringdist Package for Approximate String Matching 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://journal.r-project.org/archive/2014-1/loo.pdf">https://journal.r-project.org/archive/2014-1/loo.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lUUITtB"/></comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>van der Loo</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>van der Laan</surname>
            <given-names>J</given-names>
          </name>
          <collab>R Core Team</collab>
          <name name-style="western">
            <surname>Logan</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Muir</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <year>2018</year>  
        <access-date>2018-03-16</access-date>
        <comment>Package “stringdist” 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/stringdist/stringdist.pdf">https://cran.r-project.org/web/packages/stringdist/stringdist.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lUZpzqQ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Fienberg</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ravikumar</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Fienberg</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <source>Proceedings of IJCAI-03 Workshop on Information Integration on the Web</source>  
        <year>2003</year>  
        <access-date>2018-10-29</access-date>
        <comment>A Comparison of String Distance Metrics for Name-Matching Tasks 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cs.cmu.edu/~wcohen/postscript/ijcai-ws-2003.pdf">http://www.cs.cmu.edu/~wcohen/postscript/ijcai-ws-2003.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="73WmzjlPx"/></comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>World Health Organization</collab>
        </person-group>
        <source>Adapting and Implementing New Recommendations on HIV Case surveillance</source>  
        <year>2017</year>  
        <access-date>2018-10-29</access-date>
        <publisher-loc>Geneva, Switzerland</publisher-loc>
        <publisher-name>WHO</publisher-name>
        <comment>Consolidated Guidelines on Person-Centered HIV Patient Monitoring and Case Surveillance 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.who.int/sorry/">http://www.who.int/sorry/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="73WnCHvCz"/></comment> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
        <source>Consolidated Guidelines on Person-Centred HIV Patient Monitoring and Case Surveillance</source>  
        <year>2017</year>  
        <access-date>2018-03-16</access-date>
        <publisher-loc>Geneva, Switzerland</publisher-loc>
        <publisher-name>World Health Organization</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://apps.who.int/iris/bitstream/handle/10665/255702/9789241512633-eng.pdf?sequence=1">http://apps.who.int/iris/bitstream/handle/10665/255702/9789241512633-eng.pdf?sequence=1</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="71lUjKwmW"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fox</surname>
            <given-names>Leslie Ann</given-names>
          </name>
          <name name-style="western">
            <surname>Sheridan</surname>
            <given-names>Patty Thierry</given-names>
          </name>
        </person-group>
        <source>Advance healthcare network</source>  
        <year>2004</year>  
        <access-date>2018-03-16</access-date>
        <comment>EHR Preparation: Building Your MPI Game Plan 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://health-information.advanceweb.com/Article/EHR-Preparation-Building-Your-MPI-Game-Plan-1.aspx">http://health-information.advanceweb.com/Article/EHR-Preparation-Building-Your-MPI-Game-Plan-1.aspx</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lUsmbwJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Christen</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>A Comparison of Personal Name Matching: Techniques and Practical Issues</article-title>
        <source>Data Mining Workshops</source>  
        <year>2006</year>  
        <conf-name>Sixth IEEE International Conference on Data Mining - Workshops (ICDMW'06)</conf-name>
        <conf-date>2006</conf-date>
        <conf-loc>Canberra</conf-loc>
        <fpage>290</fpage>  
        <lpage>294</lpage>  
        <pub-id pub-id-type="doi">10.1109/ICDMW.2006.2</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>CDC</collab>
        </person-group>
        <source>National Program of Cancer Registries (NPCR)</source>  
        <year>2007</year>  
        <access-date>2018-03-16</access-date>
        <comment>Registry PlusTM Linkplus 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/cancer/npcr/tools/registryplus/lp.htm">https://www.cdc.gov/cancer/npcr/tools/registryplus/lp.htm</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lV4QdCA"/></comment> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chris</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Puttkammer</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Arnoux</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Kesner</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Griswold</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Zaidi</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Anthony</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Joseph</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Marston</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Validating Procedures used to Identify Duplicate Reports in Haiti's National HIV/AIDS Case Surveillance System</article-title>
        <source>J Registry Manag</source>  
        <year>2016</year>  
        <volume>43</volume>  
        <issue>1</issue>  
        <fpage>10</fpage>  
        <lpage>15</lpage>  
        <pub-id pub-id-type="medline">27195993</pub-id>
        <pub-id pub-id-type="pmcid">PMC5222994</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Baruah</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Kakoti Mahanta</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Design and Development of Soundex for Assamese Language</article-title>
        <source>IJCA</source>  
        <year>2015</year>  
        <month>05</month>  
        <day>20</day>  
        <volume>117</volume>  
        <issue>9</issue>  
        <fpage>9</fpage>  
        <lpage>12</lpage>  
        <pub-id pub-id-type="doi">10.5120/20581-3000</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
        <name name-style="western">
        <surname>Shah</surname>  
        <given-names>R</given-names></name>  
        <name name-style="western">
        <surname>Kumar Singh</surname>  
        <given-names>D</given-names></name> </person-group>
        <article-title>Improvement of Soundex Algorithm for Indian Language Based on Phonetic Matching</article-title>
        <source>Int J Comput Sci Eng Appl</source>  
        <year>2014</year>  
        <month>06</month>  
        <day>31</day>  
        <volume>4</volume>  
        <issue>3</issue>  
        <fpage>31</fpage>  
        <lpage>39</lpage>  
        <pub-id pub-id-type="doi">10.5121/ijcsea.2014.4303</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ndyalivana</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <source>Development of Soundex Algorithm for IsiXhosa Language</source>  
        <year>2017</year>  
        <month>10</month>  
        <day>17</day>  
        <access-date>2018-03-16</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/273455247_Development_of_Soundex_algorithm_for_isiXhosa_language">https://www.researchgate.net/publication/273455247_Development_of_Soundex_algorithm_for_isiXhosa_language</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="71lVOEhgO"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pinto da Silveira</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Artmann</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Acurácia em métodos de relacionamento probabilístico de bases de dados em saúde: revisão sistemática</article-title>
        <source>Rev Saúde Pública</source>  
        <year>2009</year>  
        <month>10</month>  
        <volume>43</volume>  
        <issue>5</issue>  
        <fpage>875</fpage>  
        <lpage>882</lpage>  
        <pub-id pub-id-type="doi">10.1590/S0034-89102009005000060</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
        <source>Joint United Nations Programme on HIV/AIDS (UNAIDS)</source>  
        <year>2014</year>  
        <access-date>2018-03-16</access-date>
        <comment>Considerations and Guidance for Countries Adopting National Health Identifiers 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.unaids.org/sites/default/files/media_asset/JC2640_nationalhealthidentifiers_en.pdf">http://www.unaids.org/sites/default/files/media_asset/JC2640_nationalhealthidentifiers_en.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="71lW5xwhH"/></comment> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
