<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i12e41529</article-id>
      <article-id pub-id-type="pmid">36446133</article-id>
      <article-id pub-id-type="doi">10.2196/41529</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing for Improved Characterization of COVID-19 Symptoms: Observational Study of 350,000 Patients in a Large Integrated Health Care System</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Sanchez</surname>
            <given-names>Travis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Koleck</surname>
            <given-names>Theresa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chrimes</surname>
            <given-names>Dillon</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Yikuan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Malden</surname>
            <given-names>Deborah E</given-names>
          </name>
          <degrees>MSc, DPhil</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Research &#38; Evaluation</institution>
            <institution>Kaiser Permanente Southern California</institution>
            <addr-line>100 S. Los Robles, 2nd Floor</addr-line>
            <addr-line>Pasadena, CA, 91101</addr-line>
            <country>United States</country>
            <phone>1 310 456 4324</phone>
            <email>debbie.e.malden@kp.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0567-8294</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Tartof</surname>
            <given-names>Sara Y</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2336-8476</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ackerson</surname>
            <given-names>Bradley K</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0816-7345</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hong</surname>
            <given-names>Vennis</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6060-8419</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Skarbinski</surname>
            <given-names>Jacek</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1630-5733</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Yau</surname>
            <given-names>Vincent</given-names>
          </name>
          <degrees>MA, PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5301-5300</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Qian</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>MS, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8001-3992</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Fischer</surname>
            <given-names>Heidi</given-names>
          </name>
          <degrees>MS, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5343-0002</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Shaw</surname>
            <given-names>Sally F</given-names>
          </name>
          <degrees>MPH, DrPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6553-1772</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Caparosa</surname>
            <given-names>Susan</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9625-7705</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Xie</surname>
            <given-names>Fagen</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1565-0490</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Epidemic Intelligence Service</institution>
        <institution>Centers for Disease Control and Prevention</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Research &#38; Evaluation</institution>
        <institution>Kaiser Permanente Southern California</institution>
        <addr-line>Pasadena, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Kaiser Permanente Bernard J. Tyson School of Medicine</institution>
        <addr-line>Pasadena, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Southern California Permanente Medical Group</institution>
        <addr-line>Harbor City, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>The Permanente Medical Group</institution>
        <institution>Kaiser Permanente Northern California</institution>
        <addr-line>Oakland, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Division of Research</institution>
        <institution>Kaiser Permanente Northern California</institution>
        <addr-line>Oakland, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Genentech, a Member of the Roche Group</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Deborah E Malden <email>debbie.e.malden@kp.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>12</month>
        <year>2022</year>
      </pub-date>
      <volume>8</volume>
      <issue>12</issue>
      <elocation-id>e41529</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>18</day>
          <month>10</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>11</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>11</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Deborah E Malden, Sara Y Tartof, Bradley K Ackerson, Vennis Hong, Jacek Skarbinski, Vincent Yau, Lei Qian, Heidi Fischer, Sally F Shaw, Susan Caparosa, Fagen Xie. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 30.12.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2022/12/e41529" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Natural language processing (NLP) of unstructured text from electronic medical records (EMR) can improve the characterization of COVID-19 signs and symptoms, but large-scale studies demonstrating the real-world application and validation of NLP for this purpose are limited.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this paper is to assess the contribution of NLP when identifying COVID-19 signs and symptoms from EMR.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study was conducted in Kaiser Permanente Southern California, a large integrated health care system using data from all patients with positive SARS-CoV-2 laboratory tests from March 2020 to May 2021. An NLP algorithm was developed to extract free text from EMR on 12 established signs and symptoms of COVID-19, including fever, cough, headache, fatigue, dyspnea, chills, sore throat, myalgia, anosmia, diarrhea, vomiting or nausea, and abdominal pain. The proportion of patients reporting each symptom and the corresponding onset dates were described before and after supplementing structured EMR data with NLP-extracted signs and symptoms. A random sample of 100 chart-reviewed and adjudicated SARS-CoV-2–positive cases were used to validate the algorithm performance.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 359,938 patients (mean age 40.4 [SD 19.2] years; 191,630/359,938, 53% female) with confirmed SARS-CoV-2 infection were identified over the study period. The most common signs and symptoms identified through NLP-supplemented analyses were cough (220,631/359,938, 61%), fever (185,618/359,938, 52%), myalgia (153,042/359,938, 43%), and headache (144,705/359,938, 40%). The NLP algorithm identified an additional 55,568 (15%) symptomatic cases that were previously defined as asymptomatic using structured data alone. The proportion of additional cases with each selected symptom identified in NLP-supplemented analysis varied across the selected symptoms, from 29% (63,742/220,631) of all records for cough to 64% (38,884/60,865) of all records with nausea or vomiting. Of the 295,305 symptomatic patients, the median time from symptom onset to testing was 3 days using structured data alone, whereas the NLP algorithm identified signs or symptoms approximately 1 day earlier. When validated against chart-reviewed cases, the NLP algorithm successfully identified signs and symptoms with consistently high sensitivity (ranging from 87% to 100%) and specificity (94% to 100%).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These findings demonstrate that NLP can identify and characterize a broad set of COVID-19 signs and symptoms from unstructured EMR data with enhanced detail and timeliness compared with structured data alone.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>COVID-19</kwd>
        <kwd>symptoms</kwd>
        <kwd>disease characterization</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>symptoms</kwd>
        <kwd>application</kwd>
        <kwd>data</kwd>
        <kwd>cough</kwd>
        <kwd>fever</kwd>
        <kwd>headache</kwd>
        <kwd>surveillance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>COVID-19, the infection caused by the novel coronavirus, SARS-CoV-2 [<xref ref-type="bibr" rid="ref1">1</xref>], has accounted for more than 623 million cases and more than 6.5 million deaths globally as of October 2022 [<xref ref-type="bibr" rid="ref2">2</xref>]. SARS-CoV-2 primarily affects the respiratory system but can also affect the cardiovascular, gastrointestinal, neurologic, and other systems [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. The most common signs and symptoms include fever, cough, shortness of breath, fatigue, muscle aches, headaches, loss of taste or smell, sore throat, congestion, nausea or vomiting, and diarrhea [<xref ref-type="bibr" rid="ref7">7</xref>]. However, prevalence estimates for each sign or symptom have been inconsistent, with most being derived from studies relying on self-reported surveys that are more subjective than electronic medical records (EMR) [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Of the studies using EMR for disease characterization, most are restricted to subgroups of patients (ie, hospitalized patients) who may have distinct symptom profiles [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. An improved understanding of signs and symptoms of COVID-19 can inform patient care and improve population screening and disease surveillance.</p>
      <p>Signs and symptoms can be documented in EMR by health care providers in four primary forms, broadly defined as “structured” and “unstructured,” which are as follows: (1) structured COVID-19 lab test order–related questionnaires; (2) structured diagnosis codes; (3) structured clinical notes (which may include self-reported information); and (4) unstructured free-text clinical notes. However, of the few large-scale studies using EMR, most are limited to structured data alone, particularly International Classification of Diseases (ICD) diagnoses, which have demonstrated low concordance with self-reported information due to incomplete documentation during physician visits [<xref ref-type="bibr" rid="ref12">12</xref>]. Natural language processing (NLP) is a subfield of artificial intelligence devoted to the understanding and generation of language and can be used to supplement structured data fields with data extracted from unstructured health care provider notes across different EMR data sources [<xref ref-type="bibr" rid="ref13">13</xref>]. In short, NLP algorithms can be designed to convert information residing in natural language into structured formats for medical research, public health surveillance, and clinical decision support [<xref ref-type="bibr" rid="ref14">14</xref>]. During the COVID-19 pandemic, NLP has mostly been used to extract key information on COVID-19 from scientific publications [<xref ref-type="bibr" rid="ref15">15</xref>], media articles [<xref ref-type="bibr" rid="ref16">16</xref>], or social media platforms [<xref ref-type="bibr" rid="ref17">17</xref>]. However, despite containing rich information on signs and symptoms of COVID-19, limited NLP-based tools have been developed for COVID-19 information extraction from unstructured EMR data. The highest-quality study thus far used an NLP-based tool termed “COVID-19 SignSym” to extract signs or symptoms from a small subset of clinical notes and performed a small validation study using data collected from 3 institutions in the United States [<xref ref-type="bibr" rid="ref18">18</xref>]. However, the real-world application and overall usefulness of NLP for this purpose has not been assessed at scale in a large population.</p>
      <p>Large integrated health care systems with access to complete EMR data provide a unique resource to investigate the value of NLP algorithms in the extraction of additional information from unstructured text fields. This paper describes the distribution and time of the onset of COVID-19 signs and symptoms before and after supplementing structured EMR with an NLP algorithm among more than 350,000 members of a large integrated health care system. In addition, we performed a validation substudy to assess the accuracy of the NLP algorithm in identifying COVID-19 signs and symptoms.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Setting</title>
        <p>Kaiser Permanente Southern California (KPSC) is one of the largest integrated health care systems in the United States providing medical services to over 4.7 million members. KPSC’s comprehensive EMR data contains individual-level structured data (including diagnosis codes, procedure codes, self-assessment health forms, medications, immunization records, and laboratory results) and unstructured data (including free-text clinical notes, radiology reports, and pathology reports) covering all medical visits. Therefore, the EMR represents a standardized data collection method across all health care settings (ie, all outpatient services, hospitals, emergency department, and virtual care encounters). Care delivered to members outside of the KPSC system is also captured, as outside providers must submit detailed claims to KPSC for reimbursement. KPSC has a diverse member population that is largely representative of all residents in Southern California with health insurance [<xref ref-type="bibr" rid="ref19">19</xref>]. As of December 2018, persons of Hispanic or Latino race or ethnicity make up the largest proportion of KPSC members (43%), followed by Non-Hispanic White (35%), Non-Hispanic Asian or Pacific Islander (12%), Non-Hispanic Black or African American (9%), and Other (1%).</p>
      </sec>
      <sec>
        <title>Study Population</title>
        <p>This is a retrospective cohort study of KPSC patients of all ages with positive SARS-CoV-2 laboratory tests from March 2020 to May 2021. SARS-CoV-2 tests of all types (ie, PCR and antigen tests) across all care settings were included. Participants were included in the analysis if they had at least 6 months of continuous KPSC membership (allowing for a 45-day administrative enrollment gap between memberships) prior to the date of their first positive COVID-19 test.</p>
      </sec>
      <sec>
        <title>Signs or Symptoms of COVID-19</title>
        <p>All EMR records were searched for 12 prespecified signs and symptoms within 30 days prior to and following the positive COVID-19 lab test order date. Signs and symptoms included fever, cough, headache, fatigue, dyspnea, chills, sore throat, myalgia, anosmia, diarrhea, vomiting or nausea, and abdominal pain, consistent with the Centers for Disease Control and Prevention (CDC) definitions [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. If none of the above signs or symptoms were detected in the EMR, the patient was categorized as asymptomatic. Signs or symptoms were identified from the following three primary sources in the EMR: (1) ICD-10 diagnosis codes; (2) keywords or phrases in medical charts; or (3) COVID-19 lab order–related questionnaires. Keywords for signs and symptoms were predetermined in consultation with trained clinicians. The complete list of ICD-10 diagnosis codes and keywords or phrases used to identify signs and symptoms can be found in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>NLP Algorithm Development</title>
        <p>An NLP algorithm was developed to identify signs and symptoms of COVID-19 and to determine their corresponding onset dates from the EMR. The algorithm development process was implemented using a rule-based approach via Python 3.6 (Python Software Foundation). This was an iterative process in which the developed algorithm was refined to align with the reference standards derived through medical chart review and adjudication. The stages of NLP algorithm development are described below and summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flow diagram describing the natural language processing algorithm for detecting signs and symptoms of COVID-19. EMR: electronic medical records.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i12e41529_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Step 1: Data Preprocessing</title>
          <p>Clinical notes and structured data (diagnosis codes and symptom related questionnaires) within 30 days prior to or following the order date of the positive SARS-CoV-2 lab test were extracted from the KPSC EMR system. The extracted clinical notes were preprocessed through letter lowercase conversion, misspelled word correction, abbreviated word standardization, sentence separation, and tokenization (ie, segmenting text into linguistic units such as words and punctuation) [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        </sec>
        <sec>
          <title>Step 2: Identification of Signs and Symptoms</title>
          <p>Patients were categorized as “Yes” for a particular symptom of interest under a set of prespecified situations (eg, if EMR notes contained a keyword or phrase related to a sign or symptom of interest, or if the patient answered “Yes” to a KPSC-administered medical questionnaire regarding COVID-19 symptoms). Keywords and phrases related to the 12 symptoms of interest were compiled by searching additional diagnosis terms and ontologies in the Unified Medical Language System [<xref ref-type="bibr" rid="ref21">21</xref>] and were enriched by experienced clinicians and the training data set. Potential variants, abbreviations, and misspellings were also identified during algorithm development and manual chart review. For example, “shortness of breath” can be abbreviated as “sob” and “nausea/vomiting” as “n/v.” Further misspellings and abbreviations are included in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. A regular expression was constructed to search and exclude sentences that contained a combination of preselected terms (eg, when notes refer to a <italic>lack</italic> of signs or symptoms or a <italic>historical</italic> medical event or indicate that signs or symptoms were experienced by someone else). A complete list of predefined sentence exclusion scenarios as well as “Yes” criteria for all signs and symptoms are provided in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
        <sec>
          <title>Step 3: Date of Symptom Onset Determination</title>
          <p>For each instance of identified signs or symptoms, the corresponding onset date was determined as either the clinical note date or by extracting the date from clinical notes under prespecified conditions, for example, where a date was detected with the symptom or followed with a phrase of “symptom (first) started,” “Date of symptoms (onset):,” “symptom onset date:,” and “onset:” in unstructured notes. Specific examples of prespecified conditions are included in Table S2 and Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. If signs or symptoms were identified from multiple clinical notes or structured data elements, the earliest date of symptom on record was assigned as the date of onset.</p>
        </sec>
      </sec>
      <sec>
        <title>NLP Algorithm Validation</title>
        <p>A sample of 100 randomly selected patients was used to assess the accuracy of the NLP algorithm in identifying each of the 12 signs or symptoms from unstructured EMR data, excluding patients used for the original algorithm development. Information on the presence or absence as well as the onset date of signs or symptoms were abstracted from EMR by trained chart abstractors using an abstraction manual. Patients for whom the sign or symptom complaint or onset date could not be clearly determined by the abstractors were further reviewed and adjudicated by a collaborating research physician. For this validation substudy, the manual chart review plus adjudicated results were deemed as the reference standard. The proportions of true positive, false positive, true negative, and false negative patients were used to estimate the sensitivity, specificity, positive predictive value (PPV), negative predictive value, and overall <italic>F</italic> score for each preselected sign or symptom of interest [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        <p>Sensitivity was defined as the proportion of patients correctly classified by the computerized NLP algorithm as experiencing the symptom of interest among patients identified with the sign or symptom by manual chart review. Specificity was the proportion of patients correctly classified as not experiencing the sign or symptom among individuals identified as not experiencing the sign or symptom according to chart review. PPV was the proportion of patients correctly classified as experiencing the sign or symptom of interest among those who were classified as experiencing the sign or symptom based on the NLP algorithm. Negative predictive value was the proportion of patients correctly classified as not experiencing the sign or symptom of interest among patients classified as not experiencing the sign or symptom based on the NLP algorithm. The <italic>F</italic> score for each comparison was calculated as (2 × PPV × sensitivity) / (PPV + sensitivity).</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>We described patient characteristics and COVID-19 symptoms by mean, SD, median, and quartiles for continuous variables, and by frequency and percentage for categorical variables. Proportions of each symptom reported using structured EMR data were compared against proportions of each symptom identified through NLP-supplemented methods. Signs and symptoms were grouped into the following four categories according to the affected body system: respiratory (cough, sore throat, and dyspnea), systemic (fever, fatigue, chills, and myalgia), gastrointestinal (diarrhea, nausea or vomiting, and abdominal pain), and neurologic (headache and anosmia). We assessed the association between characteristics of interest and inconsistencies between traditional EMR analysis using structured data and NLP supplemented analysis. All analyses were performed using Python version 3.6 and SAS statistical software version 9.4 (SAS Institute).</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study was reviewed by the CDC and was conducted consistent with applicable federal law and CDC policy—45 C.F.R. part 46.102(l)(2), 21 C.F.R. part 56; 42 U.S.C. Sect. 241(d); 5 U.S.C. Sect. 552a; 44 U.S.C. Sect. 3501 et seq. The study protocol was reviewed and approved by the KPSC Institutional Review Board (#12395) with a waiver of requirement for informed consent. Only authorized persons were provided access to individual-level patient data.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Study Population</title>
        <p>The study cohort included 359,938 patients with a positive SARS-CoV-2 laboratory test during March 2020-May 2021. Most patients were Hispanic (219,751/359,938, 61.0%), the mean age was 40.1 (SD 19.2) years, and approximately half (191,630/359,938, 53.2%) were female participants (<xref ref-type="table" rid="table1">Table 1</xref>). The most common comorbidities were hyperlipidemia (49,743/359,938, 13.8%), hypertension (48,637/359,938, 13.5%), and diabetes (41,591/359,938, 11.6%). The majority (252,869/359,938, 70.3%) of patients lived in census tracts with a median household income of less than US $80,000. Overall, 11.5% (41,307/359,938) of patients were enrolled in Medicaid.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Baseline characteristics of the study population.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="670"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>Values (N=359,938)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>191,630 (53.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>168,308 (46.8)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Race or ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic White</td>
                <td>72,705 (20.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>219,751 (61.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Black</td>
                <td>21,541 (6.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Asian</td>
                <td>21,723 (6.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Pacific Islander</td>
                <td>2362 (0.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Native American or Alaskan</td>
                <td>639 (0.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other or unknown</td>
                <td>21,217 (5.9)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Age (years) at time of SARS-CoV-2 test, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0-17</td>
                <td>44,915 (12.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-64</td>
                <td>274,932 (76.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;65</td>
                <td>40,091 (11.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>40.4 (19.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), median (IQR)</td>
                <td>40.0 (26.0, 55.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>BMI, kg/m<sup>2</sup>, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;18.5</td>
                <td>20,778 (5.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18.5-24.9</td>
                <td>72,642 (20.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>25.0-29.9</td>
                <td>102,078 (28.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>30.0-34.9</td>
                <td>79,394 (22.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>35.0-39.9</td>
                <td>40,617 (11.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>40.0-44.9</td>
                <td>17,746 (4.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥45.0</td>
                <td>11,828 (3.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td>14,855 (4.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Tobacco use status, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Current</td>
                <td>9701 (2.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Former</td>
                <td>50,013(13.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Never</td>
                <td>226,518 (62.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>73,706 (20.5)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Comorbidities, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hyperlipidemia</td>
                <td>49,743 (13.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hypertension</td>
                <td>48,637 (13.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diabetes</td>
                <td>41,591 (11.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chronic pulmonary disease</td>
                <td>21,254 (5.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Renal disease</td>
                <td>10,298 (2.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cancer</td>
                <td>5401 (1.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Stroke</td>
                <td>2937 (0.8)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Median annual household income</bold>
                  <sup>a</sup>
                  <bold>(US $), n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;40,000</td>
                <td>41,352 (11.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>40,000-79,999</td>
                <td>211,517 (58.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥80,000</td>
                <td>106,886 (29.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td>183 (0.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Insurance, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Medicaid</td>
                <td>41,307 (11.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Medicare</td>
                <td>36,013 (10.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Calendar period of SARS-CoV-2 test, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>March-May 2020</td>
                <td>9138 (2.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>June-August 2020</td>
                <td>51,406 (14.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>September-November 2020</td>
                <td>54,936 (15.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>December 2020-February 2021</td>
                <td>233,707 (64.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>March-May 2021</td>
                <td>10,751 (3.0)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Measured at the census tract level.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>COVID-19 Signs and Symptoms</title>
        <p>Supplementing structured EMR data with unstructured EMR data identified 55,568 additional symptomatic infections that were previously defined as asymptomatic based on structured data alone, representing 15.4% (55,568/359,938) of all infections. This proportion of additional identified symptomatic infections did not vary substantially by sex, age group, or race and ethnicity (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). However, there was an apparent decrease in the relative proportion of symptomatic infections identified with unstructured data during June-August 2020, whereby a higher proportion of all symptomatic cases (47,630/51,406, 92.7%) were identified via structured data compared to other time periods (60% [6456/10,751] to 80% [7336/9138]). In NLP-supplemented analyses, the symptoms ranged in frequency of reporting, from 8.0% (28,713/359,938) for abdominal pain to 61.3% (220,631/359,938) for cough. After cough, the most common symptoms identified in EMRs using NLP-supplemented analyses were fever (185,618/359,938, 51.6%), myalgia (154,042/359,938, 42.5%), headache (144,705/359,938, 40.2%), and fatigue (132,834/359,938, 36.9%; <xref rid="figure2" ref-type="fig">Figure 2</xref>A). NLP-supplemented analyses identified persons reporting each symptom that otherwise would not have been identified using structured data alone. For example, the proportion of SARS-CoV-2–positive persons reporting nausea and vomiting more than doubled, from 6.1% (21,981/359,938) in analysis restricted to structured data to 16.9% (60,865/359,938) in analyses supplementing this with NLP-derived fields from unstructured data.</p>
        <p>NLP-supplemented analyses consistently identified additional signs and symptoms across all body systems relative to structured data alone, increasing the proportion of all SARS-CoV-2–positive patients identified with respiratory symptoms from 52.6% (189,146/359,938) to 69.4% (249,987/359,938), systemic symptoms from 44.4% (159,934/359,938) to 68.9% (247,988/359,938), neurological symptoms from 29.5% (106,243/359,938) to 52.1% (187,649/359,938), and gastrointestinal symptoms from 14.8% (53,193/359,938) to 31.4% (113,006/359,938; <xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <p>Among all 359,938 patients with positive SARS-CoV-2 results, 64,633 (18%) were not identified as symptomatic at any point over the study period based on the 12 preselected symptoms used in NLP-supplemented analyses (<xref ref-type="table" rid="table2">Table 2</xref>). Among all patients identified as reporting at least one symptom, the majority (252,466/295,305, 85.5%) were tested for SARS-CoV-2 following symptom onset, and 16,491 (4.6%) were tested on the same day as symptoms were reported (<xref ref-type="table" rid="table2">Table 2</xref>). Of the remaining 26,348 persons who reported symptoms after the SARS-CoV-2 test date, most (17,956/26,348, 68.1%) reported symptoms within the first 1-7 days following the SARS-CoV-2 test. Compared with structured data alone, NLP-supplemented analyses approximately doubled the proportion of identified symptomatic cases in the 6 to 30 days prior to SARS-CoV-2 sample collection (<xref rid="figure2" ref-type="fig">Figure 2</xref>B). The median time between the onset of first symptom and obtaining a test for SARS-CoV-2 was 3 days (IQR 1-6) for analysis restricted to traditional structured EMR data, and 4 days (IQR 2-9) for analysis supplemented with NLP algorithms.</p>
        <p>NLP-supplemented analyses also increased the number of signs or symptoms identified per individual, often across multiple body systems. The proportion of patients reporting greater than 4 symptoms more than doubled in NLP-supplemented analysis compared to structured data alone, from 25.1% (90,202/359,938) to 53.1% (190,961/359,938) of all cases (<xref ref-type="table" rid="table2">Table 2</xref>). Similarly, the proportion of patients reporting symptoms related to 3 or more body systems increased from 22.6% (81,229/359,938) to 49.3% (177,440/359,938) after applying the NLP algorithm.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A comparison between structured and unstructured data. (A) Proportion of patients with SARS-CoV-2 with identified selected symptoms reported through structured and unstructured electronic medical records (EMR) data, by sign or symptom. (B) Days between testing and reported symptom onset before and after supplementing structured data with unstructured data (this includes IDC-10 codes, COVID-19 test-related questionnaires, and symptoms collected via keywords or phrases). ICD: International Classification of Diseases.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i12e41529_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>COVID-19 characterization within 30 days prior to and after SARS-CoV-2 test date among all patients with confirmed SARS-CoV-2 infection (N=359,938), by data type.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="480"/>
            <col width="230"/>
            <col width="260"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>Structured data</td>
                <td>Structured and unstructured data</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Days between testing and symptom onset<sup>a</sup>, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Testing 15-30 days after symptom onset</td>
                <td>19,376 (5.4)</td>
                <td>42,696 (11.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>10-14 days after onset</td>
                <td>12,751 (3.5)</td>
                <td>28,317 (7.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>7-9 days after symptom onset</td>
                <td>19,896 (5.5)</td>
                <td>37,325 (10.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4-6 days after symptom onset</td>
                <td>42,368 (11.8)</td>
                <td>57,569 (16.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1-3 days after symptom onset</td>
                <td>94,157 (26.2)</td>
                <td>86,559 (24.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Tested on same day as symptom onset</td>
                <td>34,146 (9.5)</td>
                <td>16,491 (4.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1-7 days before symptom onset</td>
                <td>7949 (2.2)</td>
                <td>17,956 (5.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>8-14 days before symptom onset</td>
                <td>5053 (1.4)</td>
                <td>5147 (1.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>15-30 days before symptom onset</td>
                <td>4041 (1.1)</td>
                <td>3245 (0.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No symptoms reported</td>
                <td>120,201 (33.4)</td>
                <td>64,633 (18.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Days between testing and symptom onset<sup>a</sup>, mean (SD)</td>
                <td>–3.96 (7.46)</td>
                <td>–6.31 (8.49)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Days between testing and symptom onset<sup>a</sup>, median (IQR)</td>
                <td>–3.00 (–6.00, –1.00)</td>
                <td>–4.00 (–9.00, –2.00)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Number of symptoms reported<sup>a</sup>, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td>120,201 (33.4)</td>
                <td>64,633 (18.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1-3</td>
                <td>149,535 (41.5)</td>
                <td>104,344 (30.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4-6</td>
                <td>72,929 (20.3)</td>
                <td>111,132 (30.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>7-9</td>
                <td>16,164 (4.5)</td>
                <td>65,037 (18.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>10-12</td>
                <td>1109 (0.3)</td>
                <td>14,792 (4.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Body system Involved<sup>a,b</sup>, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Respiratory</td>
                <td>189,146 (52.6)</td>
                <td>249,987 (69.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gastrointestinal</td>
                <td>53,193 (14.8)</td>
                <td>113,006 (31.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Systemic</td>
                <td>159,934 (44.4)</td>
                <td>247,988 (68.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neurologic</td>
                <td>106,243 (29.5)</td>
                <td>187,649 (52.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Number of body systems involved<sup>a</sup>, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No symptoms reported</td>
                <td>120,201 (33.4)</td>
                <td>64,633 (18.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>70,399 (19.6)</td>
                <td>41,452 (11.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>2</td>
                <td>88,109 (24.5)</td>
                <td>76,413 (21.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3</td>
                <td>63,017 (17.5)</td>
                <td>105,408 (29.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>4</td>
                <td>18,212 (5.1)</td>
                <td>72,032 (20.0)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Within 30 days prior to and after SARS-CoV-2 test date.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Reported the percentage among the study cohort for each body system.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Algorithm Validation</title>
        <p>Compared to signs or symptoms identified using structured data only, NLP-supplemented analyses consistently returned a high proportion of true positive cases across the signs and symptoms studied, with PPV values of &#62;95% for all symptoms except abdominal pain (75%). Sensitivity ranged from 87% for nausea or vomiting to 100% for cough, fever, anosmia, and abdominal pain (<xref ref-type="table" rid="table3">Table 3</xref>). Specificity ranged from 94.1% for chills to 100% (7 symptoms). <italic>F</italic> scores ranged from 0.86 to 1.00, with the majority being over 0.90. Regarding validation of onset time, 87% of onset dates identified by NLP were within +/- 3 days of those found by chart review; 70% were the same date (Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance measurements of natural language processing (NLP) algorithm to identify COVID-19 signs or symptoms, as compared with chart-confirmed validation data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="110"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="110"/>
            <col width="110"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="bottom">
                <td>Sign or symptom</td>
                <td>Chart review, (n/N)</td>
                <td>TP<sup>a</sup> by NLP</td>
                <td>TN<sup>b</sup> by NLP</td>
                <td>FN<sup>c</sup> by NLP</td>
                <td>FP<sup>d</sup> by NLP</td>
                <td>Sensitivity<sup>e</sup> (%)</td>
                <td>Specificity<sup>f</sup> (%)</td>
                <td>PPV<sup>g</sup> (%)</td>
                <td>NPV<sup>h</sup> (%)</td>
                <td><italic>F</italic> score<sup>i</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Cough</td>
                <td>76/100</td>
                <td>76</td>
                <td>23</td>
                <td>0</td>
                <td>1</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>98.7</td>
                <td>95.8</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>Fever</td>
                <td>73/100</td>
                <td>73</td>
                <td>23</td>
                <td>0</td>
                <td>4</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>94.8</td>
                <td>85.2</td>
                <td>0.97</td>
              </tr>
              <tr valign="top">
                <td>Body ache</td>
                <td>67/100</td>
                <td>64</td>
                <td>33</td>
                <td>3</td>
                <td>0</td>
                <td>95.5</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>91.7</td>
                <td>0.98</td>
              </tr>
              <tr valign="top">
                <td>Headache</td>
                <td>54/100</td>
                <td>50</td>
                <td>46</td>
                <td>4</td>
                <td>0</td>
                <td>92.6</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>92.0</td>
                <td>0.96</td>
              </tr>
              <tr valign="top">
                <td>Fatigue</td>
                <td>48/100</td>
                <td>44</td>
                <td>50</td>
                <td>4</td>
                <td>2</td>
                <td>91.7</td>
                <td>96.2</td>
                <td>95.7</td>
                <td>92.6</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>Dyspnea</td>
                <td>40/100</td>
                <td>38</td>
                <td>60</td>
                <td>2</td>
                <td>0</td>
                <td>95.0</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>96.8</td>
                <td>0.97</td>
              </tr>
              <tr valign="top">
                <td>Sore throat</td>
                <td>49/100</td>
                <td>46</td>
                <td>51</td>
                <td>3</td>
                <td>0</td>
                <td>93.9</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>94.4</td>
                <td>0.97</td>
              </tr>
              <tr valign="top">
                <td>Anosmia</td>
                <td>35/100</td>
                <td>35</td>
                <td>65</td>
                <td>0</td>
                <td>0</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>1.00</td>
              </tr>
              <tr valign="top">
                <td>Chills</td>
                <td>36/100</td>
                <td>32</td>
                <td>64</td>
                <td>4</td>
                <td>0</td>
                <td>88.9</td>
                <td>94.1</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>Diarrhea</td>
                <td>29/100</td>
                <td>28</td>
                <td>70</td>
                <td>1</td>
                <td>1</td>
                <td>96.6</td>
                <td>98.6</td>
                <td>96.6</td>
                <td>98.6</td>
                <td>0.97</td>
              </tr>
              <tr valign="top">
                <td>Nausea or vomiting</td>
                <td>23/100</td>
                <td>20</td>
                <td>76</td>
                <td>3</td>
                <td>1</td>
                <td>87.0</td>
                <td>98.7</td>
                <td>95.2</td>
                <td>96.2</td>
                <td>0.91</td>
              </tr>
              <tr valign="top">
                <td>Abdominal pain</td>
                <td>9/100</td>
                <td>9</td>
                <td>88</td>
                <td>0</td>
                <td>3</td>
                <td>100.0</td>
                <td>96.7</td>
                <td>75.0</td>
                <td>100.0</td>
                <td> 0.86</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>TP: true positive.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>TN: true negative.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>FN: false negative.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>FP: false positive.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>The proportion of symptoms correctly classified by the computerized algorithm (TP) among all cases (TP+FN) ascertained by chart review.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>The proportion of cases correctly classified as absence of symptoms by the computerized algorithm (TN) among all individuals without symptom (TN+FP) according to chart review.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>PPV: positive predictive value—the proportion of symptom cases correctly classified (TP) among all those classified by the computerized algorithm (TP+FP).</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>NPV: negative predictive value—the proportion of cases correctly classified as nonsymptom (TN) among all nonsymptom cases classified by the computerized algorithm (TN+FN).</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>The overall accuracy of NLP algorithm in identifying each sign or symptom calculated as (2×PPV×sensitivity)/(PPV+sensitivity).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>Among more than 350,000 patients, this paper demonstrates that NLP algorithms can be used to extract unstructured data from EMR on COVID-19 signs and symptoms with enhanced detail and timeliness compared with structured data alone. To the authors’ knowledge, this analysis represents the largest population study to date using NLP-based methods for identification and characterization of COVID-19 signs and symptoms.</p>
      </sec>
      <sec>
        <title>Principal Findings</title>
        <p>Overall, we observed that up to 60% of information on signs and symptoms may only be documented in the clinical narrative; however, this proportion varied widely between the conditions studied. Hence, previous real-world population studies that were limited to classical epidemiological methods (ie, using structured EMR data alone) may have underestimated the complexity and diversity of COVID-19 symptoms. This finding has important implications for patient care by improving our understanding of the whole spectrum and pathophysiology of COVID-19. This appeared particularly relevant for respiratory and gastrointestinal symptoms, whereby our data indicate that a significant proportion of symptomatic patients (24% and 53%, respectively) are overlooked when data are limited to structured components alone.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Prior studies have noted similar improvements in COVID-19 case detection when clinical notes, ICD-10 diagnosis codes, and temperature fields have been used together, particularly for gastrointestinal conditions, rash or fever, and influenza-like illness syndromes, reporting almost double the sensitivity of detection [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. The highest-quality evidence describing COVID-19 signs and symptoms to date has been derived from large meta-analyses that combine data from different study populations. In a large-scale meta-analysis including EMR data from over 4.5 million patients diagnosed with COVID-19 across 23 real-world health care databases [<xref ref-type="bibr" rid="ref25">25</xref>], of the 6 signs or symptoms studied, cough, fever, and dyspnea were the most commonly identified. In general, this pattern was similar to the results presented in this paper; however, the proportions reported per symptom were significantly lower than those identified in this study with NLP-supplemented analyses. For example, whereas 32% was the highest proportion of patients identified with a cough in the large meta-analysis, this study identified a total of 61% with cough in NLP-supplemented analyses.</p>
        <p>Compared to a systematic review including EMR and self-reported symptom data pooled from 24,410 cases across 148 studies in 9 countries [<xref ref-type="bibr" rid="ref10">10</xref>], we identified similar estimates for some signs and symptoms in this paper using NLP-supplemented analyses, such as cough (61% in this study vs 57%, respectively), fatigue (37% vs 31%), and anosmia (28% vs 25%). However, we observed a higher proportion of cases reporting most other prespecified symptoms, including dyspnea (31% vs 23%), sore throat (32% vs 12%), diarrhea (21% vs 10%), nausea or vomiting (17% vs 10%), abdominal pain (8% vs 4%), and headache (40% vs 13%). Importantly, gastrointestinal symptoms are increasingly being recognized as part of the COVID-19 spectrum, yet prior meta-analyses underestimate their prevalence compared with our work. One meta-analysis of 47 studies estimated diarrhea and nausea or vomiting in 7.7% and 7.8% patients with COVID 19 infection, respectively [<xref ref-type="bibr" rid="ref26">26</xref>], and another analysis of 78 studies estimated a weighted pooled prevalence of 12.4% (95% CI, 8.2% to 17.1%) for diarrhea, 9.0% (95% CI, 5.5% to 12.9%) for nausea or vomiting, and 6.2% (95% CI, 2.6% to 10.3%) for abdominal pain [<xref ref-type="bibr" rid="ref27">27</xref>]. In our study, approximately 21% (75,911/359,938) of patients with confirmed SARS-CoV-2 infection reported diarrhea, 17% (60,865/359,938) reported nausea or vomiting, and 8% (28,713/359,938) reported abdominal pain, all of which are higher estimates than have been reported in previous studies. Gastrointestinal involvement has been associated with delays in diagnosis compared with patients without digestive symptoms and hence may have been overlooked previously [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>The observed discrepancies between this paper and prior evidence may be the direct result of the contribution of NLP algorithms when identifying COVID-19 signs and symptoms from EMR in this study, whereas prior studies have relied on structured components of EMR alone, such as ICD-10 diagnosis codes [<xref ref-type="bibr" rid="ref25">25</xref>]. Among survey-based studies, results may be systematically biased due to responder bias or recall bias [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Importantly, study populations contributing to large meta-analyses and systematic reviews are heterogeneous with respect to their study populations and methodologies, with some restricted to symptomatic hospitalized patients [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Indeed, prior EMR- and survey-based studies restricted to hospitalized cases report higher frequencies of symptom complaints compared to this study [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. This paper includes structured and unstructured EMR data from all care settings among a single diverse patient population of all ages, substantially expanding the scope compared with prior work.</p>
        <p>Together, the findings presented here demonstrate the complexity of COVID-19, which often manifests as multiple diverse signs or symptoms across different body systems. With most prior large-scale real-world studies lacking unstructured EMR data, this observation may have been overlooked previously. As well as informing clinicians to guide patient care, understanding the complete array of signs or symptoms associated with COVID-19 could enhance population-level screening efforts. In addition, we found that NLP-supplemented analyses identified an earlier date of onset of potential COVID-19 signs and symptoms compared to traditional structured EMR data. Importantly, most of the transmission occurs within the first 5 days after symptom onset [<xref ref-type="bibr" rid="ref35">35</xref>]. Therefore, by possibly facilitating identification of an earlier date of onset relative to test positivity at the population level, NLP methods could enhance public health surveillance systems, potentially informing preventive strategies to reduce community transmission.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has at least 5 limitations, some of which are ubiquitous and unavoidable in observational research. First, while we capture symptoms occurring within 30 days of a COVID-19–positive test, it is possible that the reported symptoms detected in the EMR were due to other causes. However, chart review verified that the identified symptoms occurring within 20 days of testing were attributable to COVID-19 in the overwhelming majority of cases. Nevertheless, a comprehensive assessment of the overall usefulness of NLP would have involved a comparison with symptom reports in a SARS-CoV-2–negative population. Second, SARS-CoV-2 diagnostic tests were restricted to certain populations at differing points over the study period corresponding to periods of limited availability. As such, our estimates largely represented patients with symptomatic COVID-19 who sought medical care, and therefore it is likely that asymptomatic individuals were underrepresented in our analysis. Third, we defined symptomatic COVID-19 according to 12 conditions established as signs or symptoms of COVID-19 in the scientific literature; hence, it is possible that symptomatic cases reporting conditions outside of this established list are not counted as symptomatic. Fourth, the validation data set used in this paper included a relatively small sample size, which may have led to spurious findings. However, despite the small sample, the NLP algorithm performed well when identifying COVID-19 symptoms, producing similar sensitivity, <italic>F</italic> statistics, and PPV values to previously developed algorithms for symptom identification and COVID-19 characterization [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Lastly, this study was limited to insured individuals residing in Southern California from March 2020 to May 2021. Therefore, the findings may not be representative of or generalizable to other populations or to infections attributable to SAR-CoV-2 variants such as Delta or Omicron. However, the findings reported in this paper remain internally valid over the study period in demonstrating the overwhelming advantage of applying NLP to EMR for enhanced disease characterization across multiple clinical conditions.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This paper demonstrates that NLP can identify and characterize a broad set of COVID-19 signs and symptoms from medical records, with enhanced detail and timeliness, compared with prior EMR-based studies. These findings provide clear evidence that structured EMR data alone are incomplete for symptom capture, and NLP can enhance our understanding of the whole spectrum of disease pathophysiology. Further, as a scalable and timely method for disease characterization, NLP could strengthen COVID-19 surveillance beyond conventional surveillance systems.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supporting information.</p>
        <media xlink:href="publichealth_v8i12e41529_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 246 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical records</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">KPSC</term>
          <def>
            <p>Kaiser Permanente Southern California</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the patients of Kaiser Permanente for helping to improve care using information collected across electronic health record systems. We would like to thank the Kaiser Permanente Information Technology team for their assistance throughout. We would like to thank Michael Sheppard and Amanda Smith for offering their expertise on NLP.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>COVID-19 cases were obtained from Kaiser Permanente Southern California (KPSC)'s electronic medical records with approval from KPSC Institutional Review Board. The data sets analyzed during this study are not publicly available due to their confidential nature.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>All authors contributed to the conception and design of the study; SYT, FX, BKA, and JS contributed to the development of the natural language processing algorithm; VH, LQ, HF, FX, SYT, VY, and JS contributed to acquisition, analysis, and interpretation of data; DM, SYT, BKA, VH, JS, VY, LQ, HF, SFS, SC, and FX contributed to drafting the work; DM, SYT, BKA, VH, JS, VY, LQ, HF, SFS, SC, and FX reviewed and contributed to the development of the final draft.</p>
      </fn>
      <fn fn-type="conflict">
        <p>SYT received a grant from Roche/Genentech, Inc. to support this work. SYT, BKA, VH, JS, VY, LQ, HF, SFS, SC, and FX received support for research time with this funding. VY works for Roche-Genentech. The funder had no role in the design, conduct, or analysis of this study, or to manuscript development.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>DSC</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yuen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>N</given-names>
            </name>
            <collab>China Medical Treatment Expert Group for Covid-19</collab>
          </person-group>
          <article-title>Clinical Characteristics of Coronavirus Disease 2019 in China</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <month>04</month>
          <day>30</day>
          <volume>382</volume>
          <issue>18</issue>
          <fpage>1708</fpage>
          <lpage>1720</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32109013"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa2002032</pub-id>
          <pub-id pub-id-type="medline">32109013</pub-id>
          <pub-id pub-id-type="pmcid">PMC7092819</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>WHO coronavirus (COVID-19) dashboard</article-title>
          <source>World Health Organization</source>
          <access-date>2022-12-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://covid19.who.int/">https://covid19.who.int/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Iacucci</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Manifestations and prognosis of gastrointestinal and liver involvement in patients with COVID-19: a systematic review and meta-analysis</article-title>
          <source>The Lancet Gastroenterology &#38; Hepatology</source>
          <year>2020</year>
          <month>07</month>
          <volume>5</volume>
          <issue>7</issue>
          <fpage>667</fpage>
          <lpage>678</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32405603"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2468-1253(20)30126-6</pub-id>
          <pub-id pub-id-type="medline">32405603</pub-id>
          <pub-id pub-id-type="pii">S2468-1253(20)30126-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7217643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tenforde</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Billig Rose</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lindsell</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>NI</given-names>
            </name>
            <name name-style="western">
              <surname>Files</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Prekker</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Steingrub</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Smithline</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Aboodi</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Exline</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Henning</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Qadir</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Stubblefield</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Self</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Feldstein</surname>
              <given-names>LR</given-names>
            </name>
            <collab>CDC COVID-19 Response Team</collab>
          </person-group>
          <article-title>Characteristics of Adult Outpatients and Inpatients with COVID-19 - 11 Academic Medical Centers, United States, March-May 2020</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2020</year>
          <month>07</month>
          <day>03</day>
          <volume>69</volume>
          <issue>26</issue>
          <fpage>841</fpage>
          <lpage>846</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6926e3"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6926e3</pub-id>
          <pub-id pub-id-type="medline">32614810</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Varatharaj</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ellul</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>NWS</given-names>
            </name>
            <name name-style="western">
              <surname>Pollak</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Tenorio</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Sultan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Easton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Breen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zandi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coles</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Manji</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Shahi Salman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Menon</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholson</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kneen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pett</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Galea</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Michael</surname>
              <given-names>BD</given-names>
            </name>
            <collab>CoroNerve Study Group</collab>
          </person-group>
          <article-title>Neurological and neuropsychiatric complications of COVID-19 in 153 patients: a UK-wide surveillance study</article-title>
          <source>Lancet Psychiatry</source>
          <year>2020</year>
          <month>10</month>
          <volume>7</volume>
          <issue>10</issue>
          <fpage>875</fpage>
          <lpage>882</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32593341"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2215-0366(20)30287-X</pub-id>
          <pub-id pub-id-type="medline">32593341</pub-id>
          <pub-id pub-id-type="pii">S2215-0366(20)30287-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC7316461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and the cardiovascular system</article-title>
          <source>Nat Rev Cardiol</source>
          <year>2020</year>
          <month>05</month>
          <day>05</day>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>259</fpage>
          <lpage>260</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32139904"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41569-020-0360-5</pub-id>
          <pub-id pub-id-type="medline">32139904</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41569-020-0360-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7095524</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <article-title>Symptoms of COVID-19</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2022</year>
          <access-date>2022-12-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/coronavirus/2019-ncov/symptoms-testing/symptoms.html">https://www.cdc.gov/coronavirus/2019-ncov/symptoms-testing/symptoms.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>WE</given-names>
            </name>
            <name name-style="western">
              <surname>Altae-Tran</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Briggs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>McGee</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kamariza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nova</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pereta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Danford</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kamel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gothe</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Milam</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aurambault</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Primke</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Inkenbrandt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huynh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Croatto</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bentley</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Travassos</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coull</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Openshaw</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Greene</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Shalem</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Probasco</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Silbermann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Population-scale longitudinal mapping of COVID-19 symptoms, behaviour and testing</article-title>
          <source>Nat Hum Behav</source>
          <year>2020</year>
          <month>09</month>
          <volume>4</volume>
          <issue>9</issue>
          <fpage>972</fpage>
          <lpage>982</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32848231"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41562-020-00944-2</pub-id>
          <pub-id pub-id-type="medline">32848231</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-020-00944-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7501153</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Menni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Valdes</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Freidin</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Sudre</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Drew</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Ganesh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Varsavsky</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cardoso</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>El-Sayed Moustafa</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Visconti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hysi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>RCE</given-names>
            </name>
            <name name-style="western">
              <surname>Mangino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Falchi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ourselin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Steves</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Spector</surname>
              <given-names>TD</given-names>
            </name>
          </person-group>
          <article-title>Real-time tracking of self-reported symptoms to predict potential COVID-19</article-title>
          <source>Nat Med</source>
          <year>2020</year>
          <month>07</month>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>1037</fpage>
          <lpage>1040</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32393804"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-020-0916-2</pub-id>
          <pub-id pub-id-type="medline">32393804</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-020-0916-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7751267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Geoghegan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Arbyn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammed</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>McGuinness</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>The prevalence of symptoms in 24,410 adults infected by the novel coronavirus (SARS-CoV-2; COVID-19): A systematic review and meta-analysis of 148 studies from 9 countries</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e0234765</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0234765"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0234765</pub-id>
          <pub-id pub-id-type="medline">32574165</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-13070</pub-id>
          <pub-id pub-id-type="pmcid">PMC7310678</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Heider</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Madory</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alekseyenko</surname>
              <given-names>AV</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing enabling COVID-19 predictive analytics to support data-driven patient advising and pooled testing</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>12</month>
          <day>28</day>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34415311"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab186</pub-id>
          <pub-id pub-id-type="medline">34415311</pub-id>
          <pub-id pub-id-type="pii">6355588</pub-id>
          <pub-id pub-id-type="pmcid">PMC8714262</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crabb</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Lyons</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bale</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>West</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Peacock</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>RU</given-names>
            </name>
          </person-group>
          <article-title>Comparison of International Classification of Diseases and Related Health Problems, Tenth Revision Codes With Electronic Medical Records Among Patients With Symptoms of Coronavirus Disease 2019</article-title>
          <source>JAMA Netw Open</source>
          <year>2020</year>
          <month>08</month>
          <day>03</day>
          <volume>3</volume>
          <issue>8</issue>
          <fpage>e2017703</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32797176"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.17703</pub-id>
          <pub-id pub-id-type="medline">32797176</pub-id>
          <pub-id pub-id-type="pii">2769428</pub-id>
          <pub-id pub-id-type="pmcid">PMC7428802</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NLTK: The Natural Language Toolkit</article-title>
          <year>2002</year>
          <conf-name>Proceedings of the ACL-02 Workshop on Effective Tools and Methodologies for Teaching Natural Language Processing and Computational Linguistics</conf-name>
          <conf-date>July 07, 2002</conf-date>
          <conf-loc>Philadelphia, Pennsylvania, USA</conf-loc>
          <fpage>63</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.3115/1118108.1118117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verspoor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Munro</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Paris</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Introduction to the 1st Workshop on Natural Language Processing for COVID-19 at ACL 2020</article-title>
          <year>2020</year>
          <conf-name>Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020</conf-name>
          <conf-date>July 9-10, 2020</conf-date>
          <conf-loc>Online</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shams</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Hoque Apu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker Raihan</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Siddika</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Preo</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Hussein</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Mostari</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Web Search Engine Misinformation Notifier Extension (SEMiNExt): A Machine Learning Based Approach during COVID-19 Pandemic</article-title>
          <source>Healthcare (Basel)</source>
          <year>2021</year>
          <month>02</month>
          <day>03</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>156</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare9020156"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare9020156</pub-id>
          <pub-id pub-id-type="medline">33546110</pub-id>
          <pub-id pub-id-type="pii">healthcare9020156</pub-id>
          <pub-id pub-id-type="pmcid">PMC7913172</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matharaarachchi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Domaratzki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Muthukumarana</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Discovering Long COVID Symptom Patterns: Association Rule Mining and Sentiment Analysis in Social Media Tweets</article-title>
          <source>JMIR Form Res</source>
          <year>2022</year>
          <month>09</month>
          <day>07</day>
          <volume>6</volume>
          <issue>9</issue>
          <fpage>e37984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2022/9/e37984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/37984</pub-id>
          <pub-id pub-id-type="medline">36069846</pub-id>
          <pub-id pub-id-type="pii">v6i9e37984</pub-id>
          <pub-id pub-id-type="pmcid">PMC9494218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abu-El-Rub</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Manion</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rouhizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 SignSym: a fast adaptation of a general clinical NLP tool to identify and normalize COVID-19 signs and symptoms to OMOP common data model</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>06</month>
          <day>12</day>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1275</fpage>
          <lpage>1283</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33674830"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab015</pub-id>
          <pub-id pub-id-type="medline">33674830</pub-id>
          <pub-id pub-id-type="pii">6155732</pub-id>
          <pub-id pub-id-type="pmcid">PMC7989301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koebnick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Langer-Gould</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gould</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Iyer</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Sociodemographic characteristics of members of a large, integrated health care system: comparison with US Census Bureau data</article-title>
          <source>Perm J</source>
          <year>2012</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>37</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.thepermanentejournal.org/doi/10.7812/TPP/12-031?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7812/TPP/12-031</pub-id>
          <pub-id pub-id-type="medline">23012597</pub-id>
          <pub-id pub-id-type="pmcid">PMC3442759</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Killerby</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ashworth</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Berns</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bressler</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Bye</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Harduar Morano</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Markus</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Read</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Rissman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tate</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Midgley</surname>
              <given-names>CM</given-names>
            </name>
            <collab>Case Investigation Form Working Group</collab>
          </person-group>
          <article-title>Symptom Profiles of a Convenience Sample of Patients with COVID-19 - United States, January-April 2020</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2020</year>
          <month>07</month>
          <day>17</day>
          <volume>69</volume>
          <issue>28</issue>
          <fpage>904</fpage>
          <lpage>908</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6928a2"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6928a2</pub-id>
          <pub-id pub-id-type="medline">32673296</pub-id>
          <pub-id pub-id-type="pmcid">PMC7366851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Humphreys</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Lindberg</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Schoolman</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>GO</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System: an informatics research collaboration</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>1998</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/9452981"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.1998.0050001</pub-id>
          <pub-id pub-id-type="medline">9452981</pub-id>
          <pub-id pub-id-type="pmcid">PMC61271</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goutte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gaussier</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A Probabilistic Interpretation of Precision, Recall and F-Score, with Implication for Evaluation</article-title>
          <year>2005</year>
          <conf-name>ECIR 2005: Advances in Information Retrieval</conf-name>
          <conf-date>March 21-23, 2005</conf-date>
          <conf-loc>Compostela, Spain</conf-loc>
          <fpage>345</fpage>
          <lpage>359</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-540-31865-1_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bouchouar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hetman</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Hanley</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of an automated emergency department-based syndromic surveillance system to enhance public health surveillance in Yukon: a lower-resourced and remote setting</article-title>
          <source>BMC Public Health</source>
          <year>2021</year>
          <month>06</month>
          <day>29</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>1247</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-021-11132-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-021-11132-w</pub-id>
          <pub-id pub-id-type="medline">34187423</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-021-11132-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC8240073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ising</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Travers</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Travers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kipp</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Waller</surname>
              <given-names>AE</given-names>
            </name>
          </person-group>
          <article-title>Triage note in emergency department-based syndromic surveillance</article-title>
          <source>Advances in Disease Surveillance</source>
          <year>2006</year>
          <volume>1</volume>
          <fpage>34</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://prod-knowledge-repository.s3-us-gov-west-1.amazonaws.com/abstracts/Abstract_2005_09_Triage_Note_in_Emergency_Department-Based_Syndromic_Surveillance.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prieto-Alhambra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kostka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Duarte-Salles</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Prats-Uribe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pistillo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khalid</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Golozar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammari</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Dawoud</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nyberg</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wilcox</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Andryc</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ostropolets</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Areia</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Harle</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Blacketer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Morales</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dorr</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Burn</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Roel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Minty</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>DeFalco</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>de Maeztu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lipori</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Alghoul</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roldán</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Posada</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Horcajada</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Kohler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schilling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Recalde</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Spotnitz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Matheny</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Valveny</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alser</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Casajust</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Schuff</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Seager</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fernández-Bertolín</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fortin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Magoc</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Falconer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Subbian</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Galvan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Unraveling COVID-19: a large-scale characterization of 4.5 million COVID-19 cases using CHARYBDIS</article-title>
          <source>Res Sq</source>
          <year>2021</year>
          <month>03</month>
          <day>01</day>
          <fpage>369</fpage>
          <lpage>384</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21203/rs.3.rs-279400/v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.21203/rs.3.rs-279400/v1</pub-id>
          <pub-id pub-id-type="medline">33688639</pub-id>
          <pub-id pub-id-type="pii">rs.3.rs-279400</pub-id>
          <pub-id pub-id-type="pmcid">PMC7941629</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sultan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Altayar</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Siddique</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Davitkov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Feuerstein</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Falck-Ytter</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>El-Serag</surname>
              <given-names>HB</given-names>
            </name>
            <collab>AGA Institute</collab>
          </person-group>
          <article-title>AGA Institute Rapid Review of the Gastrointestinal and Liver Manifestations of COVID-19, Meta-Analysis of International Data, and Recommendations for the Consultative Management of Patients with COVID-19</article-title>
          <source>Gastroenterology</source>
          <year>2020</year>
          <month>07</month>
          <volume>159</volume>
          <issue>1</issue>
          <fpage>320</fpage>
          <lpage>334.e27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32407808"/>
          </comment>
          <pub-id pub-id-type="doi">10.1053/j.gastro.2020.05.001</pub-id>
          <pub-id pub-id-type="medline">32407808</pub-id>
          <pub-id pub-id-type="pii">S0016-5085(20)30593-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC7212965</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tariq</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Furqan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hassett</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pardi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prevalence and Mortality of COVID-19 Patients With Gastrointestinal Symptoms: A Systematic Review and Meta-analysis</article-title>
          <source>Mayo Clin Proc</source>
          <year>2020</year>
          <month>08</month>
          <volume>95</volume>
          <issue>8</issue>
          <fpage>1632</fpage>
          <lpage>1648</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32753138"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2020.06.003</pub-id>
          <pub-id pub-id-type="medline">32753138</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(20)30600-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7284248</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Spiegel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Digestive Symptoms in COVID-19 Patients With Mild Disease Severity: Clinical Presentation, Stool Viral RNA Testing, and Outcomes</article-title>
          <source>Am J Gastroenterol</source>
          <year>2020</year>
          <month>06</month>
          <volume>115</volume>
          <issue>6</issue>
          <fpage>916</fpage>
          <lpage>923</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32301761"/>
          </comment>
          <pub-id pub-id-type="doi">10.14309/ajg.0000000000000664</pub-id>
          <pub-id pub-id-type="medline">32301761</pub-id>
          <pub-id pub-id-type="pii">00000434-202006000-00021</pub-id>
          <pub-id pub-id-type="pmcid">PMC7172493</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ping</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Clinical Characteristics of COVID-19 Patients With Digestive Symptoms in Hubei, China: A Descriptive, Cross-Sectional, Multicenter Study</article-title>
          <source>Am J Gastroenterol</source>
          <year>2020</year>
          <month>05</month>
          <volume>115</volume>
          <issue>5</issue>
          <fpage>766</fpage>
          <lpage>773</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32287140"/>
          </comment>
          <pub-id pub-id-type="doi">10.14309/ajg.0000000000000620</pub-id>
          <pub-id pub-id-type="medline">32287140</pub-id>
          <pub-id pub-id-type="pii">00000434-202005000-00025</pub-id>
          <pub-id pub-id-type="pmcid">PMC7172492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Filler</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mathew</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Buley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Iliaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bruno-Murtha</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Osgood</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Christophi</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez-Montero</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kales</surname>
              <given-names>SN</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 symptoms predictive of healthcare workers' SARS-CoV-2 PCR results</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e0235460</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0235460"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0235460</pub-id>
          <pub-id pub-id-type="medline">32589687</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-13207</pub-id>
          <pub-id pub-id-type="pmcid">PMC7319316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Couper</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Gremel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Axinn</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Guyer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>West</surname>
              <given-names>BT</given-names>
            </name>
          </person-group>
          <article-title>New options for national population surveys: The implications of internet and smartphone coverage</article-title>
          <source>Soc Sci Res</source>
          <year>2018</year>
          <month>07</month>
          <volume>73</volume>
          <fpage>221</fpage>
          <lpage>235</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ssresearch.2018.03.008</pub-id>
          <pub-id pub-id-type="medline">29793688</pub-id>
          <pub-id pub-id-type="pii">S0049-089X(17)30787-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Clinical Characteristics of 138 Hospitalized Patients With 2019 Novel Coronavirus-Infected Pneumonia in Wuhan, China</article-title>
          <source>JAMA</source>
          <year>2020</year>
          <month>03</month>
          <day>17</day>
          <volume>323</volume>
          <issue>11</issue>
          <fpage>1061</fpage>
          <lpage>1069</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32031570"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2020.1585</pub-id>
          <pub-id pub-id-type="medline">32031570</pub-id>
          <pub-id pub-id-type="pii">2761044</pub-id>
          <pub-id pub-id-type="pmcid">PMC7042881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Docherty</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Hardwick</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Pius</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Holden</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Read</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Dondelinger</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Merson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Plotkin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sigfrid</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Halpin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gamble</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Horby</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen-Van-Tam</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Dunning</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Openshaw</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baillie</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Semple</surname>
              <given-names>MG</given-names>
            </name>
            <collab>ISARIC4C investigators</collab>
          </person-group>
          <article-title>Features of 20 133 UK patients in hospital with covid-19 using the ISARIC WHO Clinical Characterisation Protocol: prospective observational cohort study</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>05</month>
          <day>22</day>
          <volume>369</volume>
          <fpage>m1985</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32444460"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m1985</pub-id>
          <pub-id pub-id-type="medline">32444460</pub-id>
          <pub-id pub-id-type="pmcid">PMC7243036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Talavera</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>García-Azorín</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez-Pías</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Trigo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hernández-Pérez</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Valle-Peñacoba</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Simón-Campo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>de Lera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chavarría-Miranda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>López-Sanz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gutiérrez-Sánchez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez-Velasco</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pedraza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sierra</surname>
              <given-names>Á</given-names>
            </name>
            <name name-style="western">
              <surname>Gómez-Vicente</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Guerrero</surname>
              <given-names>Á</given-names>
            </name>
            <name name-style="western">
              <surname>Arenillas</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Anosmia is associated with lower in-hospital mortality in COVID-19</article-title>
          <source>J Neurol Sci</source>
          <year>2020</year>
          <month>12</month>
          <day>15</day>
          <volume>419</volume>
          <fpage>117</fpage>
          <lpage>163</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/10261/222175"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jns.2020.117163</pub-id>
          <pub-id pub-id-type="medline">33035870</pub-id>
          <pub-id pub-id-type="pii">S0022-510X(20)30499-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7527278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cevik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tate</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Maraolo</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Schafers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SARS-CoV-2, SARS-CoV, and MERS-CoV viral load dynamics, duration of viral shedding, and infectiousness: a systematic review and meta-analysis</article-title>
          <source>Lancet Microbe</source>
          <year>2021</year>
          <month>01</month>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>e13</fpage>
          <lpage>e22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2666-5247(20)30172-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2666-5247(20)30172-5</pub-id>
          <pub-id pub-id-type="medline">33521734</pub-id>
          <pub-id pub-id-type="pii">S2666-5247(20)30172-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7837230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhussayni</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Alshamery</surname>
              <given-names>ES</given-names>
            </name>
          </person-group>
          <article-title>Automated COVID-19 Dialogue System Using a New Deep Learning Network</article-title>
          <source>PEN</source>
          <year>2021</year>
          <month>04</month>
          <day>13</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>667</fpage>
          <pub-id pub-id-type="doi">10.21533/pen.v9i2.1862</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ancochea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Izquierdo</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Soriano</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Evidence of Gender Differences in the Diagnosis and Management of Coronavirus Disease 2019 Patients: An Analysis of Electronic Health Records Using Natural Language Processing and Machine Learning</article-title>
          <source>J Womens Health (Larchmt)</source>
          <year>2021</year>
          <month>03</month>
          <volume>30</volume>
          <issue>3</issue>
          <fpage>393</fpage>
          <lpage>404</lpage>
          <pub-id pub-id-type="doi">10.1089/jwh.2020.8721</pub-id>
          <pub-id pub-id-type="medline">33416429</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
