<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i5e30426</article-id>
      <article-id pub-id-type="pmid">35608886</article-id>
      <article-id pub-id-type="doi">10.2196/30426</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Cases of Shoulder Injury Related to Vaccine Administration (SIRVA) in the United States: Development and Validation of a Natural Language Processing Method</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sanchez</surname>
            <given-names>Travis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xie</surname>
            <given-names>Fagen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Wei</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>Chengyi</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Research and Evaluation</institution>
            <institution>Kaiser Permanente Southern California</institution>
            <addr-line>100 S Los Robles Ave</addr-line>
            <addr-line>2nd floor</addr-line>
            <addr-line>Pasadena, CA, 91101</addr-line>
            <country>United States</country>
            <phone>1 626 986 8665</phone>
            <email>chengyi.x.zheng@kp.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4194-0029</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Duffy</surname>
            <given-names>Jonathan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8060-0940</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>In-Lu Amy</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7760-7068</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Sy</surname>
            <given-names>Lina S</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2762-9266</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Navarro</surname>
            <given-names>Ronald A</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1869-6440</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Sunhea S</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0228-9341</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Ryan</surname>
            <given-names>Denison S</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8440-3859</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Wansu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3486-7468</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Qian</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8001-3992</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Mercado</surname>
            <given-names>Cheryl</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2981-5997</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Jacobsen</surname>
            <given-names>Steven J</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8174-8533</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Research and Evaluation</institution>
        <institution>Kaiser Permanente Southern California</institution>
        <addr-line>Pasadena, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Immunization Safety Office</institution>
        <institution>Centers for Disease Control and Prevention</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Kaiser Permanente South Bay Medical Center</institution>
        <addr-line>Harbor City, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Chengyi Zheng <email>chengyi.x.zheng@kp.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <volume>8</volume>
      <issue>5</issue>
      <elocation-id>e30426</elocation-id>
      <history>
        <date date-type="received">
          <day>13</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>24</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>26</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Chengyi Zheng, Jonathan Duffy, In-Lu Amy Liu, Lina S Sy, Ronald A Navarro, Sunhea S Kim, Denison S Ryan, Wansu Chen, Lei Qian, Cheryl Mercado, Steven J Jacobsen. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 24.05.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2022/5/e30426" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Shoulder injury related to vaccine administration (SIRVA) accounts for more than half of all claims received by the National Vaccine Injury Compensation Program. However, due to the difficulty of finding SIRVA cases in large health care databases, population-based studies are scarce.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The goal of the research was to develop a natural language processing (NLP) method to identify SIRVA cases from clinical notes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We conducted the study among members of a large integrated health care organization who were vaccinated between April 1, 2016, and December 31, 2017, and had subsequent diagnosis codes indicative of shoulder injury. Based on a training data set with a chart review reference standard of 164 cases, we developed an NLP algorithm to extract shoulder disorder information, including prior vaccination, anatomic location, temporality and causality. The algorithm identified 3 groups of positive SIRVA cases (definite, probable, and possible) based on the strength of evidence. We compared NLP results to a chart review reference standard of 100 vaccinated cases. We then applied the final automated NLP algorithm to a broader cohort of vaccinated persons with a shoulder injury diagnosis code and performed manual chart confirmation on a random sample of NLP-identified definite cases and all NLP-identified probable and possible cases.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In the validation sample, the NLP algorithm had 100% accuracy for identifying 4 SIRVA cases and 96 cases without SIRVA. In the broader cohort of 53,585 vaccinations, the NLP algorithm identified 291 definite, 124 probable, and 52 possible SIRVA cases. The chart-confirmation rates for these groups were 95.5% (278/291), 67.7% (84/124), and 17.3% (9/52), respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The algorithm performed with high sensitivity and reasonable specificity in identifying positive SIRVA cases. The NLP algorithm can potentially be used in future population-based studies to identify this rare adverse event, avoiding labor-intensive chart review validation.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>health</kwd>
        <kwd>informatics</kwd>
        <kwd>shoulder injury related to vaccine administration</kwd>
        <kwd>SIRVA</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>causal relation</kwd>
        <kwd>temporal relation</kwd>
        <kwd>pharmacovigilance</kwd>
        <kwd>electronic health records</kwd>
        <kwd>EHR</kwd>
        <kwd>vaccine safety</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>big data</kwd>
        <kwd>population health</kwd>
        <kwd>real-world data</kwd>
        <kwd>vaccines</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In 2017, shoulder injury related to vaccine administration (SIRVA) was officially added to the vaccine injury table by the National Vaccine Injury Compensation Program (VICP) [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. The VICP defined SIRVA as shoulder pain and limited range of motion occurring after the administration of a vaccine intended for intramuscular administration in the upper arm. SIRVA is caused by an injury to the musculoskeletal structures of the shoulder (eg, tendons, ligaments, bursae). In 2019, the number of claims related to SIRVA rose to 55% of all claims received by VICP, which resulted in a payout of more than $200 million [<xref ref-type="bibr" rid="ref4">4</xref>]. Meanwhile, there has been increasing debate on whether vaccination or vaccine can cause shoulder problems [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>The debate is fueled by the lack of high-quality evidence from population-based studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Most SIRVA publications have been limited to case reports [<xref ref-type="bibr" rid="ref12">12</xref>]. Based on reports filed in the Vaccine Adverse Event Reporting System (VAERS), one recent study examined cases of shoulder problems following influenza vaccine administration [<xref ref-type="bibr" rid="ref13">13</xref>]. While VAERS data rely on spontaneous reporting and can be used for safety signal detection, comprehensive electronic medical record (EMR) data from integrated health care settings are better suited to calculate incidence rates, assess risk factors, or make causal inferences. One recent population-based study that used EMR data only examined one type of shoulder condition (subdeltoid bursitis) and one type of vaccination (influenza vaccine) [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>Although EMR data provide unprecedented opportunities for research, much EMR data are stored as free text. Researchers frequently use manual chart review of medical records to acquire information that is not available from structured data in the EMR system. Because there are no defined diagnosis codes for SIRVA, SIRVA case identification and determination must be done by reviewing free-text clinical documents. Manual review is both costly and time consuming; this challenge is magnified with SIRVA. Because SIRVA occurs rarely, but shoulder problems are one of the most common musculoskeletal conditions, detecting SIRVA cases necessitates chart review of a significant number of medical records [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Compared with manual chart review of medical records, natural language processing (NLP) is more efficient and produces more consistent results [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. For clinical research, NLP facilitates the identification and extraction of information unavailable or incomplete in structured data [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. In vaccine safety studies, we have used NLP to identify 2 vaccine-related adverse events, anaphylaxis and local reaction [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Therefore, NLP has the potential to enable population-based SIRVA studies using EMR data.</p>
      <p>Our objective was to develop an efficient SIRVA case-finding strategy using an NLP algorithm. We aimed to create and evaluate NLP components required for case identification, such as anatomic location, temporality, and causation. Furthermore, we sought to validate the SIRVA algorithm in a large, diverse vaccinated population.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Setting</title>
        <p>This study was conducted at Kaiser Permanente Southern California (KPSC), an integrated health care system that provides prepaid comprehensive health care to more than 4.7 million racially, ethnically, and socioeconomically diverse members [<xref ref-type="bibr" rid="ref22">22</xref>]. KPSC’s EMR system stores medical information about sociodemographics, utilization, diagnoses, laboratory tests, pharmacy use, membership history, and vaccination. This study was performed using structured data and free-text clinical notes from the EMR.</p>
      </sec>
      <sec>
        <title>Vaccinated Population With Presumptive Shoulder Injury</title>
        <p>The study was conducted among KPSC members aged 3 years or older who had at least 1 intramuscular vaccine administered in the arm between April 1, 2016, and December 31, 2017, within a KPSC facility (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Each vaccination was specified by the members’ unique identifier, the vaccination date (index date: ie, day 0), and the laterality of vaccination. Membership was required for 180 days before and after the index date.</p>
        <p>Among the vaccinated population described above, we identified members with a presumptive shoulder injury using <italic>International Classification of Diseases, 10th Revision, Clinical Modification</italic> (ICD-10-CM) codes (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) within 180 days after the index date; the laterality of the shoulder injury code had to match that of the vaccination. We excluded vaccinations if the members had a shoulder-related visit or had a shoulder injury code within 180 days before the index date.</p>
        <p>On day 0, members could have had clinical visits with preexisting shoulder conditions and subsequently receive vaccinations. To exclude these day 0 preexisting conditions, we required at least 2 encounters on day 0, of which at least 1 of the latter encounters had to be an urgent care, emergency department, or virtual visit (email, telephone, or video encounter). We sorted day 0 encounters by their timestamps. Day 0 encounters were excluded if the first encounter on day 0 had a shoulder injury code or if the encounter occurred before vaccination. In order to exclude vaccine-related local reactions, one of the most common adverse events occurring shortly after vaccination, a shoulder injury code also needed to appear during days 31 to 180 postvaccination.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flowchart showing selection of eligible vaccinations with presumptive shoulder injuries, application of natural language processing algorithm, and shoulder injury related to vaccine administration (SIRVA) case confirmation results (index date is vaccination date). ICD: International Classification of Diseases, 10th Revision, Clinical Modification; NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i5e30426_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>SIRVA Case Definition</title>
        <p>The VICP’s SIRVA case definition was created for medicolegal purposes [<xref ref-type="bibr" rid="ref3">3</xref>]. To meet this case definition, a vaccine recipient must manifest all of the following: (1) pain and reduced range of motion are limited to the shoulder in which the intramuscular vaccine was administered, (2) pain occurs within 48 hours of vaccination, (3) no history of pain, inflammation, or dysfunction of the affected shoulder prior to vaccination that would explain the alleged condition, (4) no other condition or abnormality is present that would explain the patient’s symptoms, and (5) symptoms must last more than 6 months after vaccination [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>Based on the VICP SIRVA case definition and other publications [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], we created a SIRVA case definition suitable for a population-based study using EMR data. A valid SIRVA case needed to meet 5 criteria: (1) damage to the shoulder region occurred and was confirmed by signs and symptoms (ie, pain, limited range of motion, weakness, and stiffness) and clinical diagnosis, (2) shoulder injury occurred in the same arm in which a vaccine was injected; (3) shoulder injury started within 7 days after vaccination, (4) vaccination was a possible cause of the shoulder injury and no other known causes were associated with the shoulder injury, and (5) shoulder injury lasted more than 30 days postvaccination.</p>
      </sec>
      <sec>
        <title>Subpopulation for Training and Validation of NLP Algorithm</title>
        <p>To increase the likelihood of including true SIRVA cases in the data sets used for training and validating the NLP algorithm, we applied additional criteria to the presumptive cases to define a subpopulation (n=517; <xref rid="figure2" ref-type="fig">Figure 2</xref>): (1) exclusion of cases with an external shoulder injury (eg, accident) code within 180 days before and 180 days after vaccination, (2) exclusion of cases with a shoulder injury code on day 0, (3) requirement of a shoulder injury code during days 1 to 30, and (4) requirement of a shoulder injury code on at least 2 different dates during days 31 to 180. The criteria were based on characteristics of chart-confirmed SIRVA cases from a prior study [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flowchart to create data set for training and validation data sampling (group A: shoulder disorder diagnoses reported in shoulder injury related to vaccine administration [SIRVA] literature; group B: shoulder disorder diagnoses not previously reported in SIRVA literature; group C: shoulder symptom codes; group D: shoulder injury codes [ICD-10-CM chapter 19: Injury, poisoning and certain other consequences of external causes]). NLP: natural language processing; ICD-10: International Classification of Diseases, 10th Revision, Clinical Modification.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i5e30426_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Training Data Set</title>
          <p>From the substudy population described above, we selected a random sample for chart review. The NLP algorithm was built and refined based on incremental releases of training data [<xref ref-type="bibr" rid="ref21">21</xref>]. In contrast to machine learning methods in which the model automatically updates its parameters based on training data, we manually created and updated the search queries based on training data. Once the NLP algorithm stabilized and achieved good performance, we stopped the training process. The final training dataset had 164 cases.</p>
        </sec>
        <sec>
          <title>Validation Data Set</title>
          <p>From the remaining cases in the substudy population (n=353), we randomly selected another 100 cases to form the validation dataset. The chart review results were used to evaluate the performance of the final NLP algorithm.</p>
        </sec>
      </sec>
      <sec>
        <title>Manual Chart Review</title>
        <p>We created a chart review form based on the SIRVA case definition. Chart abstractors reviewed the medical records and recorded information on the abstraction form (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) using the REDCap (Research Electronic Data Capture) system [<xref ref-type="bibr" rid="ref25">25</xref>]. The abstraction form was derived from a previous study of subdeltoid bursitis after vaccination but was expanded to include other shoulder disorder diagnoses [<xref ref-type="bibr" rid="ref14">14</xref>]. The chart abstraction and adjudication processes were similar to those used in past vaccine safety studies [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. An ascertainment period of 180 days after vaccination was used for both NLP and chart abstraction, allowing members sufficient time to seek medical care [<xref ref-type="bibr" rid="ref14">14</xref>]. A second person reviewed each completed abstraction form for quality. A KPSC physician adjudicated the potential cases according to the SIRVA case definition for cases in which the chart reviewers had difficulty making a final assessment.</p>
      </sec>
      <sec>
        <title>NLP Terminology Development</title>
        <p>NLP terminologies were derived from various data sources, including the clinical notes of the study participants, VAERS reports [<xref ref-type="bibr" rid="ref26">26</xref>], ontologies (eg, Unified Medical Language System [<xref ref-type="bibr" rid="ref27">27</xref>]), semantic lexicons (eg, WordNet [<xref ref-type="bibr" rid="ref28">28</xref>]), and other online resources. We expanded the derived terminologies using various tools. We used Linguamatics I2E [<xref ref-type="bibr" rid="ref29">29</xref>] to identify term variations including misspellings, morphological variants, and synonyms through I2E’s synonym discovery capability. We used word-embedding methods (fastText [<xref ref-type="bibr" rid="ref30">30</xref>] and GloVe [<xref ref-type="bibr" rid="ref31">31</xref>]) to find related terms not necessarily limited to synonyms. For instance, NLTK and fastText (from the Gensim package [<xref ref-type="bibr" rid="ref32">32</xref>]) were used to train subword embedding models. Because our main interest was to identify rare terms to enrich our terminologies, we trained skip-gram models in fastText. The trained model was used to identify similar terms based on their contexts. For instance, the word “injury” has similar terms with various semantic meanings including accident, fall, laceration, overuse, trip, and sprain.</p>
      </sec>
      <sec>
        <title>NLP Indexing</title>
        <p>The preprocessing steps included section detection, sentence separation, and tokenization (that is, segmenting text into linguistic units such as words and punctuation). For each token, the indexing process added annotations for matched concepts and general linguistic entities (eg, lexical chunks like noun or verb phrases). Additional annotations captured linguistic variations such as wildcard, substring, spelling correction, and morphological variation.</p>
      </sec>
      <sec>
        <title>NLP Search</title>
        <p>We used a rule-based NLP algorithm for this study [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. The NLP algorithm was developed to search each indexed note at different levels: section (eg, “past medical history”), intrasentence, and cross-sentence. A distance-based relationship detection algorithm was applied to relate terms to other terms based on the number of words or sentences between them, thereby associating shoulder injury with information on vaccination site, temporality, or causality (<xref rid="figure3" ref-type="fig">Figure 3</xref>). The relationship detection algorithm also allowed for terms to be specified as ordered or nested (eg, an inner relation is an element of an outer relation). We used negation algorithms similar to pyConText/NegEx [<xref ref-type="bibr" rid="ref35">35</xref>] to identify negated, uncertain, and hypothetical statements. The relationship search identified 3 types of information associated with shoulder injury: anatomic, temporal, and causal.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Cross-sentence search query example. This query searches over a span of 4 sentences (4s in diagram) with a maximum number of 50 words (≤50w in diagram) in between query items. There are 2 nested relationship queries inside the outermost relationship search. The first query searches for shoulder conditions, and the second query searches for causality statement. We removed other contextual query items from diagram due to space limitations. w: week; s: sentence.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i5e30426_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The anatomic site relationship algorithm extracted the body location and laterality of the shoulder injury. For example, “left” and “arm” were identified as the laterality and body location of the shoulder injury, respectively, in the sentence “Patient has persistent pain in his left arm.”</p>
        <p>The temporal relationship algorithm used linguistic terms, such as prepositions, to extract temporal relationships such as the onset date and duration associated with the vaccination event (eg, “for 2 months,” “over the past 2 weeks,” “since last Thursday”). Incomplete temporal information was inferred based on the note creation date. For example, dates with missing year information in clinical notes were assumed to occur near the note creation date. Additional details about the types of temporal expressions extracted by the NLP algorithm are available in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <p>The causal relationship algorithm searched for possible causes of shoulder injury and classified them into 7 types (<xref ref-type="table" rid="table1">Table 1</xref>). The determination of causal relationships between cause and shoulder injury was made by lexical-syntactic rules based on more than 70 trigger terms (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). The terminologies for causes of shoulder injury other than vaccination are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. Moreover, for each relationship search, we also extracted the vaccine name if available because multiple vaccines could be administered concomitantly or during follow-up.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Types of causes associated with shoulder injuries.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="70"/>
            <col width="230"/>
            <col width="700"/>
            <thead>
              <tr valign="top">
                <td>Order</td>
                <td>Type of cause</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Vaccination</td>
                <td>Specific vaccine name or general vaccine terms</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Accident</td>
                <td>Accidents such as auto accident, fall, hit</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Work</td>
                <td>Work-related injury</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Other medical conditions</td>
                <td>Medical conditions that can cause shoulder injury such as arthritis or chest pain radiating to the shoulder</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Exercise</td>
                <td>Exercise or sports-related injury</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Daily activity</td>
                <td>Injuries occurred during other daily activities such as lifting groceries, overuse, or side sleeping</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Unknown</td>
                <td>Insidious or unknown cause</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Case Classification</title>
        <p>The final classification was based on the case definition described in the section “SIRVA case definition” by integrating vaccine, anatomic location, temporality, and causality information. Because our algorithm emphasized sensitivity, we captured additional probable and possible cases identified by NLP with weaker evidence as defined by the following 3 criteria. First, the vaccination cause was identified only by cross-sentence causal relationship search. For example, shoulder injury and vaccination were described in separate sentences: “Patient requesting an appointment for evaluation for left arm pain. States experiencing pain × 1 month s/p flu vaccine.” Second, vaccination was identified as a cause of shoulder injury 30 days or less after vaccination. Because causality was less likely to be documented when the visit date was further away from the onset date, vaccination may only be established as the cause of the shoulder injury within 30 days of vaccination, but not more than 30 days after vaccination. Third, the vaccine associated with shoulder injury documented in the clinical note did not match the vaccine recorded in the vaccination file. Positive cases that met the SIRVA case definition were further classified into 3 groups: definite if they met none of the 3 criteria; probable if they met only 1 of the 3 criteria; and possible if they met 2 or more of the 3 criteria.</p>
      </sec>
      <sec>
        <title>NLP Algorithm Performance</title>
        <p>We evaluated the NLP algorithm’s accuracy in identifying SIRVA cases compared to the chart review reference standard in the validation dataset. We calculated sensitivity, specificity, positive predictive value, and negative predictive value and their 95% confidence intervals. Since the NLP algorithm could potentially be accurate in determining a case not to be SIRVA but based on an incorrect assessment of an individual component of the SIRVA case definition not being met, we also conducted an error analysis of cases in which there were discrepancies between the NLP algorithm and chart review for individual components of the case definition.</p>
      </sec>
      <sec>
        <title>Application of NLP Algorithm to Study Population and Chart Confirmation</title>
        <p>The final NLP algorithm was applied to the broader study population of vaccinated persons with presumptive shoulder injury (based on codes) to identify potential SIRVA cases. We performed manual chart confirmation on all NLP-identified cases and calculated chart confirmation rates and their 95% confidence intervals.</p>
        <p>We assembled the final group of SIRVA cases based on the chart review results. We calculated the time between vaccination and the first visit for a shoulder disorder in these SIRVA cases. We also examined the vaccination-related temporal and causal statements in the clinical notes of these SIRVA cases.</p>
      </sec>
      <sec>
        <title>Ethical Approval</title>
        <p>The study was approved by the KPSC institutional review board (#4982), which waived the requirement for informed consent due to this being a data-only minimal risk study.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Application of NLP Algorithm to Study Population</title>
        <p>Out of 3,758,764 eligible vaccinations, we identified 77,819 records with a shoulder injury code (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Among them, 16,048 had a code on day 0. After applying the day 0 inclusion criteria, the number of day 0 records remaining was 100. The NLP algorithm was applied to 53,585 cases with presumptive shoulder injury after vaccination.</p>
      </sec>
      <sec>
        <title>Validation Results</title>
        <p>The NLP algorithm achieved perfect accuracy (100%) in identifying the 4 SIRVA cases from the validation dataset (n=100). However, the small number of positive cases resulted in wide confidence intervals for sensitivity and positive predictive value (39.6%-100.0%). Meanwhile, the confidence intervals for specificity and negative predictive value remained narrow (95.2%-100.0%).</p>
        <p>Discrepancies between the NLP algorithm and chart review were investigated by component (<xref ref-type="table" rid="table2">Table 2</xref>). For laterality, discrepancies were typically due to conflicting evidence or documentation errors in the clinical notes themselves. For temporality, the NLP algorithm incorrectly assigned symptom onset when performing cross-sentence searches and incorrectly assigned injury duration based on incorrect laterality or capture of a resolved shoulder injury.</p>
        <p>For causality, the NLP algorithm missed causes such as daily activity and accident and incorrectly identified the cause as unknown. These mistakes, however, had no bearing on the causality classification of whether or not they were vaccine-related. Furthermore, because a confirmed case must meet all of the elements of the case definition, inaccuracy in 1 element may not affect the overall accuracy of the SIRVA case classification.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Error analyses on the validation dataset.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="50"/>
            <col width="920"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Clinical text examples and the causes of Natural Language Processing (NLP) errors</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Error analysis on injury onset</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>“She has chronic pain—neck, low back, B/L<sup>a</sup> shoulders. She has fibromyalgia and also fell a few weeks ago which worsened her back pain.”<break/>NLP incorrectly associated the event (“fall”) that occurred “a few weeks ago” with the shoulder problem when performing a cross-sentence search.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>Prior condition reported on day 0 visit: “My left shoulder pain never went away despite still doing physical therapy and living on NSAIDs<sup>b</sup>. Now it is constant and much worse today.”<break/>NLP incorrectly captured “today” as the shoulder pain onset date when performing a cross-sentence search.</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Error analysis on injury duration</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>On day 136, “States in past pain would travel to left shoulder causing numbness to left arm and lasting a few days but today denies any numbness.”<break/>NLP incorrectly identified the injury duration based on a resolved shoulder symptom.</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Error analysis on injury cause</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>“...with 1 day of pain in the left arm and shoulder. Denies any injury. Did some lifting yesterday.”<break/>NLP identified the cause as unknown, failing to identify the possible cause (daily activity).</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>5</td>
                <td>“She has been working on the computer a lot. Overhead movement exacerbates the pain... No injury or trauma.”<break/>NLP identified the cause as unknown, failing to identify the possible cause (daily activity).</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>6</td>
                <td>“...who complains of left shoulder pain that started 3 weeks ago after vacuuming.”<break/>NLP identified the cause as unknown, failing to identify the possible cause (daily activity).</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>7</td>
                <td>“...likely subdeltoid bursitis and supraspinatus tendinopathy in the setting of DM<sup>c</sup> likely from acute movement with pain when getting IV<sup>d</sup> placed.”<break/>NLP identified the cause as unknown, failing to identify the possible cause (accident).</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>8</td>
                <td>“Patient reports left shoulder pain with movement; no trauma. Patient worked for years caring for young children and had to carry and lift them.”<break/>NLP identified the cause as unknown, failing to identify the possible cause (daily activity).</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>B/L: bilateral.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>NSAIDs: Nonsteroidal anti-inflammatory drugs.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>DM: diabetes mellitus.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>IV: intravenous.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP-Identified Potential SIRVA Cases</title>
        <p>We applied the final NLP algorithm to the clinical notes of 53,585 presumptive shoulder injury cases. Among them, 99.9% (53,530/53,585) had at least 1 clinical note on days 0 to 180 after vaccination. The total number of clinical notes searched by NLP was 4,292,610. The average number of clinical notes per case was 80. The index size was around 50 gigabytes. The NLP algorithm identified shoulder injury in 46,086 records, and 96.5% of them had matched laterality compared to the vaccination files (<xref ref-type="table" rid="table3">Table 3</xref>). The NLP algorithm identified at least 1 cause for 55.0% (25,325/46,086) of the NLP-identified shoulder injury cases. The temporal relation search identified the onset date for 98.2% (45,252/46,086) of the NLP-identified shoulder injury cases. About 76.2% (35,135/46,086) of these NLP-identified shoulder injury cases had symptom duration of more than 30 days postvaccination. The number of potential SIRVA cases identified by the NLP algorithm was 467, classified into 291 definite, 124 probable, and 52 possible SIRVA cases.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Number of cases identified by natural language processing (NLP) in the base study population (n=53,585).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="520"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">Natural language processing–identified cases</td>
                <td colspan="2">n</td>
                <td colspan="2">%<sup>a</sup> (n=53,585)</td>
                <td>%<sup>b</sup> (n=46,086)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Shoulder injury identified</td>
                <td colspan="2">46,086</td>
                <td colspan="2">86</td>
                <td>—<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Anatomic site</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Laterality identified</td>
                <td colspan="2">44,488</td>
                <td colspan="2">83</td>
                <td colspan="2">96.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Laterality mismatch</td>
                <td colspan="2">1220</td>
                <td colspan="2">2.3</td>
                <td colspan="2">2.6</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Causality</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cause identified<sup>d</sup></td>
                <td colspan="2">25,325</td>
                <td colspan="2">47.3</td>
                <td colspan="2">55.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cause identified<sup>e</sup></td>
                <td colspan="2">19,039</td>
                <td colspan="2">35.5</td>
                <td colspan="2">41.3</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Temporality</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Onset identified</td>
                <td colspan="2">45,252</td>
                <td colspan="2">84.4</td>
                <td colspan="2">98.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Symptom duration &gt;30 days postvaccination</td>
                <td colspan="2">35,135</td>
                <td colspan="2">65.6</td>
                <td colspan="2">76.2</td>
              </tr>
              <tr valign="top">
                <td colspan="3">SIRVA<sup>f</sup> cases</td>
                <td colspan="2">467</td>
                <td colspan="2">0.9</td>
                <td>1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><bold><sup>a</sup></bold>Percentage of cases among the number of cases with shoulder injury diagnosis code (n=53,585).</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Percentage of cases among the number of natural language processing–identified shoulder injury cases (n=46,086).</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Not applicable.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>Includes unknown cause stated in the clinical notes.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>Excludes unknown cause stated in the clinical notes.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>SIRVA: shoulder injury related to vaccine administration.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Final SIRVA Cases After Chart Review</title>
        <p>We performed chart review on 467 NLP-identified SIRVA cases (<xref ref-type="table" rid="table4">Table 4</xref>). The chart confirmation rates were 95.5% (95% CI 92.5%-97.4%), 67.7% (95% CI 59.1%-75.3%), and 18.9% (95% CI 8.7%-30.8%) for the definite, probable, and possible groups, respectively. The final number of SIRVA cases was 371.</p>
        <p>Among these 371 cases, the median times from vaccination to the first and last visit with a shoulder injury code were 43 days (IQR 21-79 days, range 0-180 days) and 127 days (IQR 77-162, range 31-180 days), respectively. The symptom onset occurred 2 or fewer days after vaccination in 93.5% (347/371) of cases and from 3 to 7 days after vaccination in 6.5% (24/371) of cases. Most cases (355/371, 95.7%) had explicit temporal statements on symptom onset in relation to vaccination. Examples included “L shoulder pain that started the day she got a flu shot” and “Right shoulder pain and neck stiffness since immunizations.” The symptom onset for the remaining cases (16/371, 4.3%) could be derived based on the date of clinical visit, symptom duration, and causality statement (eg, “Reports having R shoulder pain for last 2 months. Thought related to vaccine she received in R arm”). In 145 cases, there were explicit causal statements regarding the shoulder condition and the vaccination (eg, “status post vaccination—suspect rotator cuff irritation from vaccination itself”). Of those, 40 cases had mention of incorrect vaccine administration.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Number of natural language processing–identified cases and chart-confirmed cases.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="500"/>
            <col width="0"/>
            <col width="150"/>
            <col width="150"/>
            <col width="0"/>
            <col width="170"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">NLP<sup>a</sup>-identified group</td>
                <td>NLP-identified</td>
                <td colspan="2">Chart confirmed</td>
                <td>Confirmation rate (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Definite</td>
                <td>291</td>
                <td colspan="2">278</td>
                <td>95.5</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Probable</bold>
                </td>
                <td>124</td>
                <td colspan="2">84</td>
                <td>67.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cross-sentence causality</td>
                <td colspan="2">64</td>
                <td>46</td>
                <td colspan="2">71.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vaccination cause identified ≤30 days after vaccination</td>
                <td colspan="2">41</td>
                <td>26</td>
                <td colspan="2">63.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vaccine mismatch</td>
                <td colspan="2">19</td>
                <td>12</td>
                <td colspan="2">63.2</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Possible</td>
                <td>52</td>
                <td colspan="2">9</td>
                <td>17.3</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Total</td>
                <td>467</td>
                <td colspan="2">371</td>
                <td>79.4</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>SIRVA is a rare outcome after vaccination that does not have a specific diagnosis code, and it is impractical to conduct manual chart review to identify all SIRVA cases. We developed and validated an NLP algorithm to identify potential SIRVA cases with high accuracy. The only previous population-based study on SIRVA [<xref ref-type="bibr" rid="ref14">14</xref>] was limited to shoulder bursitis after influenza vaccination. In that study, a random sample of 526 out of 1098 presumptive cases was chart reviewed to identify 12 subdeltoid bursitis cases attributed to vaccination. In this study, we included cases with all types of shoulder disorder diagnoses after vaccinations. Out of 53,585 presumptive cases, the NLP algorithm combined with manual chart review yielded 371 SIRVA cases. Among 3.8 million vaccinations, the rate of SIRVA in this study was around 1 per 10,000 vaccinations [<xref ref-type="bibr" rid="ref12">12</xref>]. It should be noted that our SIRVA case definition was different from that of the VICP and other studies in terms of symptom onset, duration, and severity.</p>
        <p>Although the NLP algorithm’s overall accuracy was high, some challenges remained with the laterality component, despite the addition of laterality information in ICD-10-CM coding. First, descriptions of symptom location may not be precise. For example, the arm could refer to the region from the shoulder joint to the elbow joint (upper arm) or further down to the wrist. Second, the laterality recorded in the vaccine file or documented in the clinical notes could be incorrect. These issues must be considered when conducting studies using anatomic and laterality information.</p>
        <p>There were several lessons learned from the temporality component of the NLP algorithm. First, there could be documentation of multiple onset dates during the 180 days after vaccination. Second, the disease onset information was more likely to be incomplete or inaccurate when the onset date was in the distant past, which could make it difficult to determine the onset date if the clinical visit date was further away from the vaccination date. In this study, to maximize sensitivity, any potential case with an onset falling within the predefined onset window satisfied the onset criteria.</p>
        <p>In our study, the causality component worked reasonably well in identifying vaccination-related causality statements. Although the provider or patient may have stated that the shoulder injury was vaccination-related, such statements do not provide definitive proof of causality. Because shoulder symptoms could have an insidious onset with multiple contributing factors, it was difficult to draw definitive conclusions about cause and effect. To improve specificity, we excluded cases with nonvaccination causes of shoulder injury. However, it was still challenging to identify nonvaccination causes. First, there were numerous causes of shoulder injuries. Second, some of the causes could also be the treatment for the shoulder problem. For example, exercise could be both the cause and the therapy plan for shoulder injuries. Third, the cause of shoulder injury was often not mentioned in the clinical notes. In this study, the NLP algorithm could not identify the cause in about half of the cases. Last, the cause of shoulder injury was often not described in the same sentence as the shoulder symptom. The cross-sentence relationship search increased the sensitivity but decreased specificity. Causal relations have been studied extensively in the NLP field [<xref ref-type="bibr" rid="ref36">36</xref>], but only a few studies focused on health-related causal relations and were conducted using Twitter messages [<xref ref-type="bibr" rid="ref37">37</xref>] and literature [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. One study extracted causal relations from clinical text using 3 causal key phrases (because, due to, and secondary to) and discontinuation key phrases to detect adverse drug reactions in ambulatory notes and achieved high specificity (98%) but low sensitivity (31%) and positive predictive value (45%) [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        <p>SIRVA-related shoulder symptoms are common for other acute or chronic medical conditions with many possible causes. Correctly integrating the NLP-identified laterality, temporality, and causality information is nontrivial. For the same patient, different clinical encounters could attribute the shoulder injury to different causes. In this study, we made patient-level classifications by using the information identified from all the components from all the notes. The combination of information across multiple notes increased the sensitivity of finding SIRVA cases but reduced the specificity since the NLP algorithm could misinterpret unrelated information extracted from multiple notes.</p>
        <p>Because we tailored the NLP algorithm to emphasize sensitivity, the confirmation rates were low in the probable (67.7%) and possible (17.3%) groups. However, since SIRVA is a rare event, manual review of all the probable and possible cases was feasible in this study. In future studies, instead of categorizing the NLP output based on the strength of evidence, a machine learning model could be built on top of the NLP outputs [<xref ref-type="bibr" rid="ref15">15</xref>] to further improve accuracy and develop thresholds. The SIRVA cases identified in this study could also serve as training data for a machine learning algorithm.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had some potential limitations. We were unable to apply the algorithm to all the eligible vaccinations (n=3,758,764) due to time and resource restrictions. Our study population was limited to vaccinated cases with a diagnosis code for shoulder injury. However, loss of sensitivity is expected to be minimal since we used a comprehensive list of codes. Additionally, shoulder injuries can last a long time and are often accompanied by repeated visits. The 6-month lookback window used in this study may not have been sufficient to remove preexisting shoulder conditions. Failure to exclude prior shoulder conditions could reduce the specificity of the NLP algorithm. In our vaccine-related local reaction study [<xref ref-type="bibr" rid="ref20">20</xref>], most people diagnosed with a presumptive code of interest on day 0 had symptom onset before vaccination. In this study, we excluded most cases with a shoulder injury code on day 0. Further research is needed to study the association between SIRVA and day 0 shoulder injury codes. Finally, because our method was tailored to this specific outcome after vaccination, its generalizability for use with other outcomes is unclear.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We developed and validated an NLP algorithm to identify potential SIRVA cases among vaccinated persons with presumptive shoulder injury. The algorithm achieved high sensitivity and reasonable specificity. The NLP algorithm can potentially be used in future population-based studies to identify this rare adverse event, avoiding labor-intensive chart review validation.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>International Classification of Diseases, 10th Revision, Clinical Modification code groups for identifying presumptive shoulder injury cases.</p>
        <media xlink:href="publichealth_v8i5e30426_app1.docx" xlink:title="DOCX File , 61 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Chart review abstraction form.</p>
        <media xlink:href="publichealth_v8i5e30426_app2.docx" xlink:title="DOCX File , 1106 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Sample extracted temporal expressions.</p>
        <media xlink:href="publichealth_v8i5e30426_app3.docx" xlink:title="DOCX File , 21 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Trigger phrases for identifying causal relationships.</p>
        <media xlink:href="publichealth_v8i5e30426_app4.docx" xlink:title="DOCX File , 25 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Terminology for causes of shoulder injury other than vaccination.</p>
        <media xlink:href="publichealth_v8i5e30426_app5.docx" xlink:title="DOCX File , 22 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">KPSC</term>
          <def>
            <p>Kaiser Permanente Southern California</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICD-10-CM</term>
          <def>
            <p>International Classification of Diseases, 10th Revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">REDCap</term>
          <def>
            <p>Research Electronic Data Capture</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SIRVA</term>
          <def>
            <p>shoulder injury related to vaccine administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">VAERS</term>
          <def>
            <p>Vaccine Adverse Event Reporting System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">VICP</term>
          <def>
            <p>National Vaccine Injury Compensation Program</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We want to thank the following persons for their contributions to data collection and medical record abstraction: Anna Lawless, Bernadine Dizon, Claire Park, Jose Pio, Joy Gelfond, Karen Schenk, Kerresa Morrissette, Melena Taylor, Nancy Canul-Jauriga, and Radha Bathala. This study was funded under contract 200-2012-53580 through the Vaccine Safety Datalink from the Centers for Disease Control and Prevention (CDC). The findings and conclusions in this report are those of the authors and do not necessarily represent the official position of the CDC.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>CZ, JD, LSS, CM, and SJJ contributed to conceptualizing the study and obtained the funding. CZ, I-LAL, SSK and DSR collected and analyzed the data. CZ designed and implemented the natural language processing algorithm. CZ drafted the initial manuscript. CZ, JD, LSS, WC, RAN provided critical revision of the article for important intellectual content. All authors reviewed and approved the final manuscript for publication.</p>
      </fn>
      <fn fn-type="conflict">
        <p>LSS has received research support from GlaxoSmithKline, Dynavax, Seqirus, and Moderna for studies unrelated to this paper. LQ has received research support from GlaxoSmithKline, Moderna, and Dynavax for studies unrelated to this paper. All other authors report no conflicts of interest related to the submitted work.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Atanasoff</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lightfoot</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Johann-Liang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Shoulder injury related to vaccine administration (SIRVA)</article-title>
          <source>Vaccine</source>
          <year>2010</year>
          <month>11</month>
          <day>29</day>
          <volume>28</volume>
          <issue>51</issue>
          <fpage>8049</fpage>
          <lpage>8052</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2010.10.005</pub-id>
          <pub-id pub-id-type="medline">20955829</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(10)01465-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>Adverse Effects of Vaccines: Evidence and Causality</source>
          <year>2012</year>
          <publisher-loc>Washington</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>Vaccine Injury Table</article-title>
          <source>Health Resources and Services Administration</source>
          <access-date>2020-10-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hrsa.gov/sites/default/files/vaccinecompensation/vaccineinjurytable.pdf">https://www.hrsa.gov/sites/default/files/vaccinecompensation/vaccineinjurytable.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>National vaccine injury compensation program data report</article-title>
          <source>Health Resources and Services Administration</source>
          <year>2020</year>
          <access-date>2020-10-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hrsa.gov/sites/default/files/hrsa/vaccine-compensation/data/vicp-stats-05-01-2022.pdf">https://www.hrsa.gov/sites/default/files/hrsa/vaccine-compensation/data/vicp-stats-05-01-2022.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>American Academy of Orthopaedic Surgeons</collab>
          </person-group>
          <article-title>Position statement 1190</article-title>
          <source>Rotator cuff tendinopathy, adhesive capsulitis, and arthritis cannot be caused by vaccine administration</source>
          <year>2019</year>
          <month>06</month>
          <access-date>2021-06-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aaos.org/contentassets/1cd7f41417ec4dd4b5c4c48532183b96/1190-rotator-cuff-tendinopathy-adhesive-capsulitis-and-arthritis-can-not-be-caused-by-vaccine-administration.pdf">https://tinyurl.com/2sfs24kw</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <article-title>National Vaccine Injury Compensation Program: revisions to the vaccine injury table</article-title>
          <source>U.S. Department of Health and Human Services</source>
          <year>2020</year>
          <month>07</month>
          <day>20</day>
          <access-date>2021-06-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.govinfo.gov/content/pkg/FR-2020-07-20/pdf/2020-15673.pdf">https://www.govinfo.gov/content/pkg/FR-2020-07-20/pdf/2020-15673.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Kortlever</surname>
              <given-names>JTP</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Ring</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>Influenza vaccination is not associated with increased number of visits for shoulder pain</article-title>
          <source>Clin Orthop Relat Res</source>
          <year>2020</year>
          <month>10</month>
          <volume>478</volume>
          <issue>10</issue>
          <fpage>2343</fpage>
          <lpage>2348</lpage>
          <pub-id pub-id-type="doi">10.1097/CORR.0000000000001215</pub-id>
          <pub-id pub-id-type="medline">32141910</pub-id>
          <pub-id pub-id-type="pii">00003086-202010000-00024</pub-id>
          <pub-id pub-id-type="pmcid">PMC7491880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martín Arias</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Sanz Fadrique</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sáinz Gil</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salgueiro-Vazquez</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Risk of bursitis and other injuries and dysfunctions of the shoulder following vaccinations</article-title>
          <source>Vaccine</source>
          <year>2017</year>
          <month>09</month>
          <day>05</day>
          <volume>35</volume>
          <issue>37</issue>
          <fpage>4870</fpage>
          <lpage>4876</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2017.07.055</pub-id>
          <pub-id pub-id-type="medline">28774564</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(17)30963-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodor</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montalvo</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Vaccination-related shoulder dysfunction</article-title>
          <source>Vaccine</source>
          <year>2007</year>
          <month>01</month>
          <day>08</day>
          <volume>25</volume>
          <issue>4</issue>
          <fpage>585</fpage>
          <lpage>587</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2006.08.034</pub-id>
          <pub-id pub-id-type="medline">17064824</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(06)00990-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salmon</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Geoffroy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eschard</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Ohl</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Bone erosion and subacromial bursitis caused by diphtheria-tetanus-poliomyelitis vaccine</article-title>
          <source>Vaccine</source>
          <year>2015</year>
          <month>11</month>
          <day>17</day>
          <volume>33</volume>
          <issue>46</issue>
          <fpage>6152</fpage>
          <lpage>6155</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2015.09.090</pub-id>
          <pub-id pub-id-type="medline">26458794</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(15)01380-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Erickson</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>DiCarlo</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Brause</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hannafin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Lytic lesion in the proximal humerus after a flu shot: a case report</article-title>
          <source>JBJS Case Connect</source>
          <year>2019</year>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e0248</fpage>
          <pub-id pub-id-type="doi">10.2106/JBJS.CC.18.00248</pub-id>
          <pub-id pub-id-type="medline">31274645</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Duffy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Navarro</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Mercado</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Risk for Shoulder Conditions After Vaccination: A Population-Based Study Using Real-World Data</article-title>
          <source>Ann Intern Med</source>
          <year>2022</year>
          <month>05</month>
          <volume>175</volume>
          <issue>5</issue>
          <fpage>634</fpage>
          <lpage>643</lpage>
          <pub-id pub-id-type="doi">10.7326/M21-3023</pub-id>
          <pub-id pub-id-type="medline">35313110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hibbs</surname>
              <given-names>BF</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Museru</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Moro</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Marquez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cano</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Shimabukuro</surname>
              <given-names>TT</given-names>
            </name>
          </person-group>
          <article-title>Reports of atypical shoulder pain and dysfunction following inactivated influenza vaccine, Vaccine Adverse Event Reporting System (VAERS), 2010-2017</article-title>
          <source>Vaccine</source>
          <year>2020</year>
          <month>01</month>
          <day>29</day>
          <volume>38</volume>
          <issue>5</issue>
          <fpage>1137</fpage>
          <lpage>1143</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2019.11.023</pub-id>
          <pub-id pub-id-type="medline">31784231</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(19)31546-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Navarro</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Daley</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Getahun</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Henninger</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Nordin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Zerbo</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Duffy</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Risk for subdeltoid bursitis after influenza vaccination: a population-based cohort study</article-title>
          <source>Ann Intern Med</source>
          <year>2020</year>
          <month>08</month>
          <day>18</day>
          <volume>173</volume>
          <issue>4</issue>
          <fpage>253</fpage>
          <lpage>261</lpage>
          <pub-id pub-id-type="doi">10.7326/M19-3176</pub-id>
          <pub-id pub-id-type="medline">32568572</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Koblick</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Cheetham</surname>
              <given-names>TC</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing and machine learning to identify gout flares from electronic clinical notes</article-title>
          <source>Arthritis Care Res (Hoboken)</source>
          <year>2014</year>
          <month>11</month>
          <day>24</day>
          <volume>66</volume>
          <issue>11</issue>
          <fpage>1740</fpage>
          <lpage>1748</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/acr.22324"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/acr.22324</pub-id>
          <pub-id pub-id-type="medline">24664671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing: an introduction</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <month>10</month>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>544</fpage>
          <lpage>551</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&amp;pmid=21846786"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000464</pub-id>
          <pub-id pub-id-type="medline">21846786</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000464</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168328</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Methods and dimensions of electronic health record data quality assessment: enabling reuse for clinical research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>01</month>
          <day>1</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>144</fpage>
          <lpage>151</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=22733976"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000681</pub-id>
          <pub-id pub-id-type="medline">22733976</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000681</pub-id>
          <pub-id pub-id-type="pmcid">PMC3555312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shivade</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fosler-Lussier</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Embi</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>A review of approaches to identifying patient phenotype cohorts using electronic health records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <month>03</month>
          <day>01</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>221</fpage>
          <lpage>230</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24201027"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001935</pub-id>
          <pub-id pub-id-type="medline">24201027</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001935</pub-id>
          <pub-id pub-id-type="pmcid">PMC3932460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Biomedical Natural Language Processing</source>
          <year>2014</year>
          <publisher-loc>Amsterdam</publisher-loc>
          <publisher-name>John Benjamins Publishing Company</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mercado</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Glenn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>HF</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duffy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McNeil</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Daley</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>McLean</surname>
              <given-names>HQ</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>The use of natural language processing to identify vaccine-related anaphylaxis at five health care systems in the Vaccine Safety Datalink</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2020</year>
          <month>02</month>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>182</fpage>
          <lpage>188</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.4919</pub-id>
          <pub-id pub-id-type="medline">31797475</pub-id>
          <pub-id pub-id-type="pmcid">PMC7528887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mercado</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Glenn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>HF</given-names>
            </name>
            <name name-style="western">
              <surname>Duffy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Daley</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>McLean</surname>
              <given-names>HQ</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>The use of natural language processing to identify Tdap-related local reactions at five health care systems in the Vaccine Safety Datalink</article-title>
          <source>Int J Med Inform</source>
          <year>2019</year>
          <month>07</month>
          <volume>127</volume>
          <fpage>27</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31128829"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.04.009</pub-id>
          <pub-id pub-id-type="medline">31128829</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(18)31029-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6645678</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koebnick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Langer-Gould</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gould</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Iyer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Sociodemographic characteristics of members of a large, integrated health care system: comparison with US Census Bureau data</article-title>
          <source>Perm J</source>
          <year>2012</year>
          <month>08</month>
          <day>01</day>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>37</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23012597"/>
          </comment>
          <pub-id pub-id-type="doi">10.7812/tpp/12-031</pub-id>
          <pub-id pub-id-type="medline">23012597</pub-id>
          <pub-id pub-id-type="pmcid">PMC3442759</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Health Resources and Services Administration</collab>
          </person-group>
          <source>What You Need to Know About the National Vaccine Injury Compensation Program (VICP)</source>
          <year>2019</year>
          <month>04</month>
          <access-date>2021-03-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hrsa.gov/sites/default/files/hrsa/vaccine-compensation/resources/about-vaccine-injury-compensation-program-booklet.pdf">https://www.hrsa.gov/sites/default/files/hrsa/vaccine-compensation/resources/about-vaccine-injury-compensation-program-booklet.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Atanasoff</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hibbs</surname>
              <given-names>BF</given-names>
            </name>
            <name name-style="western">
              <surname>Adegoke</surname>
              <given-names>OJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Marquez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Osborn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Moro</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Shimabukuro</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Shoulder Injury Related to Vaccine Administration (SIRVA): petitioner claims to the National Vaccine Injury Compensation Program, 2010-2016</article-title>
          <source>Vaccine</source>
          <year>2020</year>
          <month>01</month>
          <day>29</day>
          <volume>38</volume>
          <issue>5</issue>
          <fpage>1076</fpage>
          <lpage>1083</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2019.11.032</pub-id>
          <pub-id pub-id-type="medline">31771864</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(19)31557-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Thielke</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Conde</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Research electronic data capture (REDCap)—a metadata-driven methodology and workflow process for providing translational research informatics support</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>04</month>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>377</fpage>
          <lpage>381</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(08)00122-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id>
          <pub-id pub-id-type="medline">18929686</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(08)00122-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC2700030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shimabukuro</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>DeStefano</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Safety monitoring in the Vaccine Adverse Event Reporting System (VAERS)</article-title>
          <source>Vaccine</source>
          <year>2015</year>
          <month>08</month>
          <day>26</day>
          <volume>33</volume>
          <issue>36</issue>
          <fpage>4398</fpage>
          <lpage>4405</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26209838"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2015.07.035</pub-id>
          <pub-id pub-id-type="medline">26209838</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(15)00982-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4632204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>1</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>D270</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://nar.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>WordNet: a lexical database for English</article-title>
          <source>Commun ACM</source>
          <year>1995</year>
          <month>11</month>
          <volume>38</volume>
          <issue>11</issue>
          <fpage>39</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1145/219717.219748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cormack</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nath</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Milward</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Raja</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnalagadda</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Agile text mining for the 2014 i2b2/UTHealth Cardiac risk factors challenge</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58 Suppl</volume>
          <fpage>S120</fpage>
          <lpage>S127</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00141-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.06.030</pub-id>
          <pub-id pub-id-type="medline">26209007</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00141-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC4737484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Enriching word vectors with subword information</article-title>
          <source>Transact Assoc Comput Ling</source>
          <year>2017</year>
          <month>12</month>
          <volume>5</volume>
          <fpage>135</fpage>
          <lpage>146</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Glove: global vectors for word representation</article-title>
          <year>2014</year>
          <conf-name>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Doha</conf-loc>
          <fpage>1532</fpage>
          <lpage>1543</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1162"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rehurek</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sojka</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Software framework for topic modeling with large corpora</article-title>
          <year>2010</year>
          <conf-name>Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks</conf-name>
          <conf-date>2010</conf-date>
          <conf-loc>Valletta</conf-loc>
          <fpage>45</fpage>
          <lpage>50</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mercado</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ackerson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>HF</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing for identification of herpes zoster ophthalmicus cases to support population-based study</article-title>
          <source>Clin Exp Ophthalmol</source>
          <year>2019</year>
          <month>01</month>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>7</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1111/ceo.13340</pub-id>
          <pub-id pub-id-type="medline">29920898</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Redberg</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Ferencik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Natsui</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawatkar</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Musigdilok</surname>
              <given-names>VV</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Automated identification and extraction of exercise treadmill test results</article-title>
          <source>J Am Heart Assoc</source>
          <year>2020</year>
          <month>03</month>
          <day>03</day>
          <volume>9</volume>
          <issue>5</issue>
          <fpage>e014940</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/10.1161/JAHA.119.014940?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.119.014940</pub-id>
          <pub-id pub-id-type="medline">32079480</pub-id>
          <pub-id pub-id-type="pmcid">PMC7335560</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>Document-level classification of CT pulmonary angiography reports based on an extension of the ConText algorithm</article-title>
          <source>J Biomed Inform</source>
          <year>2011</year>
          <month>10</month>
          <volume>44</volume>
          <issue>5</issue>
          <fpage>728</fpage>
          <lpage>737</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00062-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2011.03.011</pub-id>
          <pub-id pub-id-type="medline">21459155</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(11)00062-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC3164892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Asghar</surname>
              <given-names>Nabiha</given-names>
            </name>
          </person-group>
          <article-title>arXiv preprint arXiv:1605.07895</article-title>
          <source>Automatic extraction of causal relations from natural language texts: a comprehensive survey</source>
          <year>2016</year>
          <access-date>2022-05-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1605.07895">https://arxiv.org/abs/1605.07895</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Tilak</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Zisook</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Torii</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Extracting health-related causality from Twitter messages using natural language processing</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>04</day>
          <volume>19</volume>
          <issue>Suppl 3</issue>
          <fpage>79</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0785-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0785-0</pub-id>
          <pub-id pub-id-type="medline">30943954</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0785-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6448183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Extracting causal knowledge from a medical database using graphical patterns</article-title>
          <year>2000</year>
          <conf-name>Proceedings of the 38th annual meeting of the association for computational linguistics</conf-name>
          <conf-date>2000</conf-date>
          <conf-loc>Hong Kong</conf-loc>
          <pub-id pub-id-type="doi">10.3115/1075218.1075261</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mihăilă</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>BioCause: annotating and analysing causality in the biomedical domain</article-title>
          <source>BMC Bioinformatics</source>
          <year>2013</year>
          <month>01</month>
          <day>16</day>
          <volume>14</volume>
          <fpage>2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-14-2</pub-id>
          <pub-id pub-id-type="medline">23323613</pub-id>
          <pub-id pub-id-type="pii">1471-2105-14-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC3621543</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bakal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Talari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kakani</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Kavuluru</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Exploiting semantic patterns over biomedical knowledge graphs for predicting treatment and causative relations</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>06</month>
          <volume>82</volume>
          <fpage>189</fpage>
          <lpage>199</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30086-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.05.003</pub-id>
          <pub-id pub-id-type="medline">29763706</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30086-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6070294</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cantor</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Triola</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Using trigger phrases to detect adverse drug reactions in ambulatory care notes</article-title>
          <source>Qual Saf Health Care</source>
          <year>2007</year>
          <month>04</month>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>132</fpage>
          <lpage>134</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17403760"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/qshc.2006.020073</pub-id>
          <pub-id pub-id-type="medline">17403760</pub-id>
          <pub-id pub-id-type="pii">16/2/132</pub-id>
          <pub-id pub-id-type="pmcid">PMC2653150</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
