<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v4i2e29</article-id>
    <article-id pub-id-type="pmid">29695376</article-id>
    <article-id pub-id-type="doi">10.2196/publichealth.9361</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Clinical Relation Extraction Toward Drug Safety Surveillance Using Electronic Health Record Narratives: Classical Learning Versus Deep Learning</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Torii</surname>
          <given-names>Manabu</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Gonzalez</surname>
          <given-names>Graciela</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Liu</surname>
          <given-names>Mei</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1">
        <name name-style="western">
          <surname>Munkhdalai</surname>
          <given-names>Tsendsuren</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8783-4993</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Liu</surname>
          <given-names>Feifan</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0881-6365</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3" corresp="yes">
      <name name-style="western">
        <surname>Yu</surname>
        <given-names>Hong</given-names>
      </name>
      <degrees>FACMI, PhD</degrees>
      <xref rid="aff2" ref-type="aff">2</xref>
      <address>
        <institution>Department of Computer Science</institution>
        <institution>University of Massachusetts Lowell</institution>
        <addr-line>1 University Ave</addr-line>
        <addr-line>Lowell, MA, 01854</addr-line>
        <country>United States</country>
        <phone>1 9789343620</phone>
        <fax>1 9789343551</fax>
        <email>hong_yu@uml.edu</email>
      </address>  
      <xref rid="aff3" ref-type="aff">3</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9263-5035</ext-link></contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>Department of Quantitative Health Sciences</institution>
    <institution>University of Massachusetts Medical School</institution>  
    <addr-line>Worcester, MA</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Computer Science</institution>
    <institution>University of Massachusetts Lowell</institution>  
    <addr-line>Lowell, MA</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
      <sup>3</sup>
      <institution>The Bedford Veterans Affairs Medical Center</institution>
      <addr-line>Bedford, MA</addr-line>
      <country>United States</country>
    </aff>
    <author-notes>
      <corresp>Corresponding Author: Hong Yu 
      <email>hong_yu@uml.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Apr-Jun</season><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>25</day>
      <month>04</month>
      <year>2018</year>
    </pub-date>
    <volume>4</volume>
    <issue>2</issue>
    <elocation-id>e29</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>8</day>
        <month>11</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>9</day>
        <month>12</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>3</day>
        <month>2</month>
        <year>2018</year>
      </date>
      <date date-type="accepted">
        <day>5</day>
        <month>2</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Tsendsuren Munkhdalai, Feifan Liu, Hong Yu. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 25.04.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://publichealth.jmir.org/2018/2/e29/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Medication and adverse drug event (ADE) information extracted from electronic health record (EHR) notes can be a rich resource for drug safety surveillance. Existing observational studies have mainly relied on structured EHR data to obtain ADE information; however, ADEs are often buried in the EHR narratives and not recorded in structured data.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>To unlock ADE-related information from EHR narratives, there is a need to extract relevant entities and identify relations among them. In this study, we focus on relation identification. This study aimed to evaluate natural language processing and machine learning approaches using the expert-annotated medical entities and relations in the context of drug safety surveillance, and investigate how different learning approaches perform under different configurations.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We have manually annotated 791 EHR notes with 9 named entities (eg, medication, indication, severity, and ADEs) and 7 different types of relations (eg, medication-dosage, medication-ADE, and severity-ADE). Then, we explored 3 supervised machine learning systems for relation identification: (1) a support vector machines (SVM) system, (2) an end-to-end deep neural network system, and (3) a supervised descriptive rule induction baseline system. For the neural network system, we exploited the state-of-the-art recurrent neural network (RNN) and attention models. We report the performance by macro-averaged precision, recall, and F1-score across the relation types.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>Our results show that the SVM model achieved the best average F1-score of 89.1% on test data, outperforming the long short-term memory (LSTM) model with attention (F1-score of 65.72%) as well as the rule induction baseline system (F1-score of 7.47%) by a large margin. The bidirectional LSTM model with attention achieved the best performance among different RNN models. With the inclusion of additional features in the LSTM model, its performance can be boosted to an average F1-score of 77.35%.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>It shows that classical learning models (SVM) remains advantageous over deep learning models (RNN variants) for clinical relation identification, especially for long-distance intersentential relations. However, RNNs demonstrate a great potential of significant improvement if more training data become available. Our work is an important step toward mining EHRs to improve the efficacy of drug safety surveillance. Most importantly, the annotated data used in this study will be made publicly available, which will further promote drug safety research in the community.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>medical informatics applications</kwd>
      <kwd>drug-related side effects and adverse reactions</kwd>
      <kwd>neural networks</kwd>
      <kwd>natural language processing</kwd>
      <kwd>electronic health records</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Significance</title>
        <p>Prescription drug safety represents a major public health concern [<xref ref-type="bibr" rid="ref1">1</xref>]. An adverse drug event (ADE) is “an injury resulting from medical intervention related to a drug” [<xref ref-type="bibr" rid="ref2">2</xref>]. ADEs are common and occur in approximately 2-5% of hospitalized adult patients [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Each ADE is estimated to increase the length of a hospital stay by more than 2 days and hospital cost by more than US $3200 [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Severe ADEs rank among the top 4 or 6 leading causes of death in the United States [<xref ref-type="bibr" rid="ref7">7</xref>]. Prevention, early detection, and mitigation of ADEs could save both lives and resources [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>Due to the limited number of participants and inclusion or exclusion criteria reflecting specific subject characteristics, premarketing randomized clinical trials frequently miss ADEs [<xref ref-type="bibr" rid="ref1">1</xref>], and thus, postmarketing drug safety surveillance [<xref ref-type="bibr" rid="ref10">10</xref>] is vitally important for health care and patient safety. The Food and Drug Administration (FDA) maintains an adverse event reporting system called the Food and Drug Administration Adverse Event Reporting System for postmarketing safety surveillance, but it faces challenges including underreporting [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] and missing important patterns of drug exposure [<xref ref-type="bibr" rid="ref13">13</xref>]. Other resources have been shown to be useful for identifying ADEs, including biomedical literature [<xref ref-type="bibr" rid="ref14">14</xref>] and social media [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. However, biomedical literature has been shown to identify mostly a limited set of rare ADEs [<xref ref-type="bibr" rid="ref19">19</xref>]. Social media has its own challenges, such as missing important drug exposure patterns and generalizing system to deal with data heterogeneity [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>It is well known that electronic health records (EHRs) contain rich ADE information and are an important resource for drug safety surveillance [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Since 2009, the FDA has invested in facilitating the use of routinely collected EHR data to perform active surveillance of the safety of marketed medical products [<xref ref-type="bibr" rid="ref22">22</xref>]. Existing ADE-targeted observational studies have focused on structured EHR data for obtaining ADE information [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]; however, ADEs are often buried in the EHR narratives and not recorded in structured data. Manual abstraction of data from EHR notes [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref26">26</xref>] remains a costly and significant impediment to drug safety surveillance research. Exploring natural language processing (NLP) approaches for efficient, accurate, and automated ADE detection can provide significant cost and logistical advantages over manual chart review or voluntary reporting.</p>
      </sec>
      <sec>
        <title>Mining Clinical Narratives for ADE Detection</title>
        <p>Quite a few NLP approaches have been explored for mining ADE information from unstructured data of the aforementioned sources, such as biomedical literature [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], social media [<xref ref-type="bibr" rid="ref29">29</xref>], FDA event reporting system narratives [<xref ref-type="bibr" rid="ref30">30</xref>], and EHRs [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. The 2009 i2b2 (Informatics for Integrating Biology and the Bedside) medication challenge [<xref ref-type="bibr" rid="ref41">41</xref>] and the 2010 i2b2 relation challenge [<xref ref-type="bibr" rid="ref42">42</xref>] plays an important role to promote methodology advancement in this field. Existing studies are limited to detect only on the document level by identifying discharge summaries that contains ADE [<xref ref-type="bibr" rid="ref31">31</xref>], or mainly focus on detecting entities representing relevant events (eg, adverse events and medication events) [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], or deal with only intrasentential relations [<xref ref-type="bibr" rid="ref42">42</xref>], or identify relations purely based on statistical association analysis among drug and outcome concepts, which are recognized by mapping free clinical text onto medical terminology [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. Henriksson et al [<xref ref-type="bibr" rid="ref35">35</xref>] explored traditional random-forest algorithm to identify relations between drugs and disorders (or findings) on Swedish clinical notes, and reported that the intersentential relations are challenging and hard to detect.</p>
        <p>Recently, deep learning with neural networks has received increasing attention in NLP tasks [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], and for relation extraction, the state-of-the-art systems are based on 2 networks: recurrent neural networks (RNNs) [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>] and convolutional neural networks (CNNs) [<xref ref-type="bibr" rid="ref48">48</xref>], and an end-to-end relation extraction model [<xref ref-type="bibr" rid="ref49">49</xref>] obtained competitive performance on several datasets. So far, there is less related work on evaluating deep learning methods on ADE relation extraction. Li et al [<xref ref-type="bibr" rid="ref50">50</xref>] proposed a bidirectional LSTM to extract ADE relations from biomedical literature. As the model is dependent on the parsing of a sentence, it is difficult to apply that on clinical notes which contain more abbreviations and ungrammatical language expressions. In clinical domain, Lv et al [<xref ref-type="bibr" rid="ref51">51</xref>] combined autoencoder with conditional random fields, and Sahu et al [<xref ref-type="bibr" rid="ref52">52</xref>] proposed a domain invariant CNNs for ADE extraction on the i2b2 data. All the 3 studies are limited to extract relations within 1 sentence.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>In this study, we investigate ADE-relevant relation extraction on both intra- and intersentential settings. To this end, we have built a benchmark corpus consisting of clinical notes where medical concepts related to ADE and their relations were annotated via a manual chart review. Then, we experimented with 3 supervised machine learning approaches for ADE relation identification from clinical notes. The first approach is based on rule induction, which is similar to supervised descriptive rule induction [<xref ref-type="bibr" rid="ref53">53</xref>] but is relatively simple. Rules for each relation type are automatically induced based on the corresponding descriptive statistics obtained from the training data, and then those rules are used to classify new entity pairs. Our second approach uses a classical support vector machines (SVM)-based machine learning model. Our third approach is based on deep learning neural networks, which explore RNNs with attention mechanisms. In addition to benchmark the overall performance, we empirically analyzed how well deep learning models are in terms of recognizing long-distance relations, and how the training data size affects learning performance on clinical data. Compared with previous studies, the main contributions of this work are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>We build a new annotated benchmark corpus of EHR notes for ADE information extraction. Compared with the existing i2b2 data, this corpus contains much richer annotations related to ADE research, for example, all the medications are profiled with attributes enabling ADE connected to a specific dose of medication (note that many ADEs are caused by high dosage); severity concepts are also annotated and associated with ADEs.</p>
          </list-item>
          <list-item>
            <p>The annotated data in this study will be shared with the community to further promote research for drug safety surveillance.</p>
          </list-item>
          <list-item>
            <p>It is the first attempt to investigate and evaluate modeling 7 heterogeneous clinical relations in a single framework: relations between medication and its attributes, relations between ADE and its severity, relations between medication and ADE, and relations between medication and indication.</p>
          </list-item>
          <list-item>
            <p>We explored RNNs and attention mechanisms for clinical relation extraction beyond sentence boundaries, and investigate how the length between two entities affects the performance for different learning models. To our knowledge, this is the first study of applying deep learning approaches on both inter- and intrasentential relation extraction using EHR data.</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Annotation</title>
        <p>The annotated corpus contains 791 English EHR notes from cancer patients, which were randomly sampled from people who have been diagnosed with hematological malignancy and have drug exposure to one or more of the 12 cancer drugs of interest, including Romidepsin, Rituximab, Brentuximab vedotin, Ponatib, Carfilzomib. All the notes are longitudinal and no note type filtering was performed. We manually annotated 8 named entities and 7 relation types among them: <italic>Dosage-Medication, Route-Medication, Frequency-Medication, Duration-Medication, Medication-Indication, Medication-ADE</italic>, and <italic>Severity-ADE</italic>. One named entity that is not involved in relations is “other signs and symptoms.” Our annotation guidelines are an extension of the i2b2 annotation guidelines [<xref ref-type="bibr" rid="ref42">42</xref>] and have been iteratively developed by domain experts. Unlike other clinical corpora that annotate entity relations at the sentence level, we annotated entity relations beyond sentence boundaries. Each EHR note was annotated by at least 2 annotators, and the interannotator agreement of .93 kappa was achieved on our annotations.</p>
        <p>The resulting annotated data consisted of 667,061 tokens, 48,803 entity mentions (61.7 per note), and 16,022 entity relations (20.3 per note). The relation distributions in these datasets are reported in the last column of <xref ref-type="table" rid="table1">Table 1</xref>. <italic>Frequency</italic>, <italic>dosage</italic>, and <italic>indication</italic> are the most frequent relations, whereas <italic>duration</italic> and <italic>adverse</italic> relations are less frequent in the corpus. We split the corpus into 602/95/94 train/develop/test sets.</p>
        <p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the distribution of relation token distance (the number of tokens between a relation entity mention pair). As shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>, most relations occurred within a window of up to 9 tokens. On the other hand, some relations connected entities across multiple sentences. The average relation token distance was 7, and the maximum distance was 769.</p>
        <p>To formulate the relation identification task, our goal was to learn a function <italic>f</italic> (<italic>x</italic>) that mapped an input entity pair (<italic>e</italic><sub>l</sub>, <italic>e</italic><sub>r</sub>) to a relation type <italic>y</italic> ∈ <italic>Y</italic>, where <italic>Y</italic> is the set of all possible relation types including <italic>None</italic>, which in our system denotes the existence of no relation between an entity pair. An entity <italic>e</italic><sub>i</sub> ∈ <italic>E</italic> is any observed entity mention within a document <italic>d</italic> ∈ <italic>D</italic>. The input entity pair (<italic>e</italic><sub>l</sub>, <italic>e</italic><sub>r</sub>) is sampled from all possible entity pairs <italic>E</italic> x <italic>E</italic> within the document and is labeled with a relation type if a true relation holds for it; otherwise, it is labeled <italic>None</italic>. The mention pair and the document within which that pair occurs form a machine learning example <italic>x</italic> in our task. We implemented and evaluated 3 supervised machine learning approaches as described below, and the experiment workflow is shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Clinical relation types in our corpus. Entity mentions forming relations are in italics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="400"/>
            <col width="400"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Description</td>
                <td>Example</td>
                <td>#relations<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td><italic>Dosage</italic></td>
                <td>An attribute of a medication: the amount of the medication to be taken</td>
                <td>She receives <italic>Albuterol 2 puffs</italic> p.o. q4-6h</td>
                <td>2643/336/409</td>
              </tr>
              <tr valign="top">
                <td><italic>Route</italic></td>
                <td>An attribute of a medication: how the medication is administered</td>
                <td>She receives <italic>Albuterol</italic> 2 puffs <italic>p.o.</italic> q4-6h</td>
                <td>1908/269/332</td>
              </tr>
              <tr valign="top">
                <td><italic>Frequency</italic></td>
                <td>An attribute of a medication: frequency of the administration</td>
                <td>She receives <italic>Albuterol</italic> 2 puffs p.o. <italic>q4-6h</italic></td>
                <td>2691/351/451</td>
              </tr>
              <tr valign="top">
                <td><italic>Duration</italic></td>
                <td>An attribute of a medication</td>
                <td>The patient was treated with <italic>ampicillin</italic> for <italic>2 weeks</italic></td>
                <td>493/95/110</td>
              </tr>
              <tr valign="top">
                <td><italic>Indication</italic></td>
                <td>A causal relation between a medication and indication: why the drug is taken</td>
                <td>He later received <italic>chemotherapy</italic> for his <italic>lung cancer</italic></td>
                <td>2301/264/379</td>
              </tr>
              <tr valign="top">
                <td><italic>Adverse Event</italic></td>
                <td>A causal relation between a medication and an injury: the consequence of a medication</td>
                <td>Patient’s death was due to <italic>anaphylactic shock</italic> caused by the intravenously administered <italic>penicillin</italic></td>
                <td>717/134/134</td>
              </tr>
              <tr valign="top">
                <td><italic>Severity</italic></td>
                <td>The attribute of an adverse event</td>
                <td>He has <italic>severe diarrhea</italic></td>
                <td>1505/259/241</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>the number of relations for each type (train/develop/test).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The distribution of relation token distance.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i2e29_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Experimental workflow for adverse drug event (ADE) detection. EHRs: electronic health records; SVM: support vector machines; AE: adverse events.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i2e29_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Induction Rule Baseline</title>
        <p>Our first supervised approach used automatically induced rules from the training data, motivated by the observation that the distance between 2 entities was a potentially strong indicator of their relations. For example, we observed that drug attributes typically followed drug names and, in contrast, the distance between adverse drug events and their drugs was relatively far. Therefore, our rule-induction classifier was based on the token distance between 2 entities.</p>
        <p>Formally, the classifier considered an entity pair (<italic>e</italic><sub>l</sub>, <italic>e</italic><sub>r</sub>) that occurred within a certain distance as a true relation, and the pair was assigned one of the positive relation types, . For training, we calculated the average token distance of the entity pairs for each relation type. We then defined 7 different token distance bins by using these average distances and assigning a single positive relation label to each bin. During prediction, we chose one of the relation labels if the token distance of 2 entities fell in the corresponding bin. For example, if the average token distance for <italic>Severity</italic> relations was 3 and for <italic>Frequency</italic> was 7, we then had 2 bins, { <italic>n</italic> &#124; 0 &#60; <italic>n</italic> ≤ 3} and { <italic>n</italic> &#124; 3 &#60; <italic>n</italic> ≤ 7} (<italic>n</italic> was the token distance). If the token distance <italic>n</italic> between an entity pair was in the first bin, the entity pair was given the label <italic>Severity</italic>; otherwise, it was labeled <italic>Frequency</italic> or <italic>None</italic>. We considered an entity pair as <italic>None</italic> relation if their token distance did not belong to any one of the predefined bins.</p>
      </sec>
      <sec>
        <title>Support Vector Machines System</title>
        <p>We identified a set of rich learning features to build a linear kernel SVM classifier. We chose linear SVM due to its ability to accommodate a large feature space. The features we explored are described below.</p>
        <p><italic>Document-level features</italic> consisted of the frequencies of a specific entity and entity type in a document.</p>
        <p><italic>Relation-specific features</italic> were specific to an entity pair being considered for classification. The features were as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>token distance between the 2 entities</p>
          </list-item>
          <list-item>
            <p>number of clinical entities between the 2 entities</p>
          </list-item>
          <list-item>
            <p><italic>n-grams</italic> (<italic>1, 2, 3-grams</italic>) between the 2 entities</p>
          </list-item>
          <list-item>
            <p><italic>n-grams</italic> (<italic>1, 2, 3-grams</italic>) of surrounding tokens of the 2 entities. The surrounding tokens were within a window size, which was defined empirically in our experiment.</p>
          </list-item>
        </list>
        <p><italic>Entity-level features</italic> defined how likely an individual entity mention was involved in a relation:</p>
        <list list-type="bullet">
          <list-item>
            <p>one-hot encoding of the left entity type, <italic>e</italic><sub>l</sub></p>
          </list-item>
          <list-item>
            <p>one-hot encoding of the right entity type, <italic>e</italic><sub>r</sub></p>
          </list-item>
          <list-item>
            <p>character <italic>n-grams</italic> (<italic>2, 3-grams</italic>) of the named entities.</p>
          </list-item>
        </list>
        <p><italic>Semantic features</italic> were derived using the MetaMap tool from National Library of Medicine. Specifically, we mapped entity mentions and their surrounding context to their UMLS(Unified Medical Language System) concepts, preferred terms, and semantic types. We renormalized the concept IDs (identifiers) to their corresponding semantic type names and included both shortened and multiword forms of the semantic types in the feature set. We set the window size of the surrounding context to 10 in the MetaMap tool.</p>
        <p><italic>Word representation features</italic> were generated to overcome the data sparsity challenge. We explored word clustering and word vector representation features that have been shown to improve performance for chemical and biomedical named-entity recognition tasks [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. In particular, we used the Brown clustering model and Word Vector Classes as word clustering features and applied raw word embedding as word vector features.</p>
        <p>We trained the Brown cluster model [<xref ref-type="bibr" rid="ref56">56</xref>] on a large collection of biomedical text. We then obtained the cluster label prefixes (ie, the top levels of the cluster hierarchy) with 4, 6, 10, and 20 lengths from the Brown model as features for the context of each entity mention. We empirically set the context window size to 10 in this study. To learn broader contextual information, we also explored recently introduced skip-gram model [<xref ref-type="bibr" rid="ref57">57</xref>]. The skip-gram model is used to predict the contextual words given an input token, and this yielded a dense word embedding for the token that effectively carried its syntactic and semantic information. We first built a skip-gram model on a large unlabeled text consisting of the PubMed abstracts and the EHRs [<xref ref-type="bibr" rid="ref43">43</xref>], and an additional set of ~2 million PubMed Central full articles. The word embedding induced by the skip-gram model were then clustered into 300 different groups by using a K-means algorithm to obtain cluster labels that we called Word Vector Classes (WVCs). As with the Brown model features, we mapped the entity mention context to their WVCs and included these WVCs in the feature set. We also used the raw word embedding as word representation features in our model, which provided a fine-grained latent feature of word semantic and syntactic information.</p>
        <p>The character and word <italic>n-grams</italic> were converted into <italic>TF-IDF(term frequency-inverse document frequency)</italic> weights based on the training set. We stored the <italic>TF-IDF</italic> weights and used them to extract features from the development and test sets. We did not involve the development and test sets in the <italic>n-gram</italic> extraction and the <italic>TF-IDF</italic> calculation to ensure that our models and the features were not biased. We did not extract any sentence-specific features, which allowed us to classify intra- and intersentential relations jointly with a single SVM model.</p>
      </sec>
      <sec>
        <title>End-to-End Deep Neural Networks</title>
        <p>We explored LSTM and attention-based neural network methods to classify clinical relations in an end-to-end fashion [<xref ref-type="bibr" rid="ref58">58</xref>] without feature engineering. The reason behind this choice is based on reported advantages of RNNs over CNNs in relation extraction tasks [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>].</p>
        <p>LSTM is a variation of RNN models and was introduced to solve the gradient vanishing problem [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>]. It can model long-term dependencies with its internal memory, and it achieved notable success with NLP tasks including machine translation [<xref ref-type="bibr" rid="ref63">63</xref>], speech recognition [<xref ref-type="bibr" rid="ref64">64</xref>], and textual entailment recognition [<xref ref-type="bibr" rid="ref65">65</xref>]. The LSTM can effectively learn vector representations for various levels of linguistic units to facilitate different classification tasks. The attention mechanism can help LSTM construct a better representation by selecting important context in an EHR document. As it is computationally expensive to use the whole document for learning the representations, we focused on text windows associated with the 2 entities in our model.</p>
        <p>Let <italic>x</italic><sub>t</sub>, <italic>h</italic><sub>t</sub>, and <italic>c</italic><sub>t</sub> be the input, output, and cell state, respectively, at time step <italic>t</italic>. Given a window of token representations (ie, word embeddings) <italic>x</italic><sub>1</sub>,…, <italic>x</italic><sub>l</sub> (<italic>x</italic><sub>l</sub> is the head token for the entity <italic>e</italic><sub>l</sub> and <italic>L</italic> is the window size), an LSTM with hidden size <italic>k</italic> computes a sequence of the outputs <italic>h</italic><sub>1</sub>,…, <italic>h</italic><sub>l</sub> and another sequence of the cell states <italic>c</italic><sub>1</sub>,…, <italic>c</italic><sub>l</sub> as: σ</p>
        <p><italic>i</italic><sub>t</sub>= σ (<italic>W</italic><sub>1</sub><sup>lstm</sup><italic>x</italic><sub>t</sub>+ <italic>W</italic><sub>2</sub><sup>lstm</sup><italic>h</italic><sub>t-1</sub>+ <italic>b</italic><sub>1</sub><sup>lstm</sup>) (1)</p>
        <p><italic>i</italic><sub>t</sub><sup>'</sup>= <italic>tanh</italic> (<italic>W</italic><sub>3</sub><sup>lstm</sup><italic>x</italic><sub>t</sub>+ <italic>W</italic><sub>4</sub><sup>lstm</sup><italic>h</italic><sub>t-1</sub>+ <italic>b</italic><sub>2</sub><sup>lstm</sup>) (2)</p>
        <p><italic>f</italic><sub>t</sub>= σ (<italic>W</italic><sub>5</sub><sup>lstm</sup><italic>x</italic><sub>t</sub>+ <italic>W</italic><sub>6</sub><sup>lstm</sup><italic>h</italic><sub>t-1</sub>+ <italic>b</italic><sub>3</sub><sup>lstm</sup>) (3)</p>
        <p><italic>o</italic><sub>t</sub>= σ (<italic>W</italic><sub>7</sub><sup>lstm</sup><italic>x</italic><sub>t</sub>+ <italic>W</italic><sub>8</sub><sup>lstm</sup><italic>h</italic><sub>t-1</sub>+ <italic>b</italic><sub>4</sub><sup>lstm</sup>) (4)</p>
        <p><italic>c</italic><sub>t</sub> = <italic>f</italic><sub>t</sub> ⊙ <italic>c</italic><sub>t-1</sub> + <italic>i</italic><sub>t</sub> ⊙ <italic>i</italic><sub>t</sub><sup>’</sup> (5)</p>
        <p><italic>h</italic><sub>t</sub> = <italic>o</italic><sub>t</sub> ⊙ <italic>tanh</italic> (<italic>c</italic><sub>t</sub>) (6)</p>
        <p>where <italic>W</italic><sub>1</sub><sup>lstm</sup>,…, <italic>W</italic><sub>8</sub><sup>lstm</sup> ∈ <italic>R</italic><sup>k×k</sup> and <italic>b</italic><sub>1</sub><sup>lstm</sup>,…, <italic>b</italic><sub>4</sub><sup>lstm</sup> ∈ <italic>R</italic><sup>k</sup> are the training parameters, and σ and ⊙ denote the element-wise sigmoid function and the element-wise vector multiplication, respectively.</p>
        <p>As described by the equations, the memory cell <italic>c</italic><sub>t</sub> and hidden state <italic>h</italic><sub>t</sub> were updated by reading a word token <italic>x</italic><sub>t</sub> at a time. The memory cell <italic>c</italic><sub>t</sub> then learns to remember the contextual information that is relevant to the entity mention. This information is then provided to the hidden state <italic>h</italic><sub>t</sub> by using a gating mechanism, and the last hidden state <italic>h</italic><sub>l</sub> summarizes all the relevant information for the sequence. <italic>i</italic><sub>t</sub>, <italic>f</italic><sub>t</sub>, and <italic>o</italic><sub>t</sub> are called gates whose values are defined by the nonlinear combination of the previous hidden state <italic>h</italic><sub>t-1</sub> and the current input token <italic>x</italic><sub>t</sub> and range from 0 to 1. The input gate <italic>i</italic><sub>t</sub> controls how much information needs to flow into the memory cell, whereas the forget gate <italic>f</italic><sub>t</sub> decides what information needs to be erased from the memory cell. The output <italic>o</italic><sub>t</sub> finally produces the hidden state for the current input token.</p>
        <p>We further used the output <italic>h</italic><sub>l</sub> and <italic>h</italic><sub>r</sub> corresponding to the input token heads of the entity pair <italic>e</italic><sub>l</sub> and <italic>e</italic><sub>r</sub> as the entity representations. The representation <italic>h</italic><sub>r</sub> for entity <italic>e</italic><sub>r</sub> was obtained similarly by reading its token window with another LSTM. The representations <italic>h</italic><sub>l</sub> and <italic>h</italic><sub>r</sub> were then composed by using a function <italic>g</italic> (<italic>h</italic><sub>l</sub>, <italic>h</italic><sub>r</sub>) to produce a relation representation <italic>r</italic><sub>lr</sub>. We used a multilayered perceptron (MLP) with a concatenated input for <italic>g</italic> (<italic>h</italic><sub>l</sub>, <italic>h</italic><sub>r</sub>) in our model, defined as:</p>
        <p><italic>r</italic><sub>lr</sub>= <italic>g</italic> (<italic>h</italic><sub>l</sub>, <italic>h</italic><sub>r</sub>) (7)</p>
        <p><italic>g</italic> (<italic>h</italic><sub>l</sub>, <italic>h</italic><sub>r</sub>) = <italic>tanh</italic> (<italic>W</italic><sub>mlp</sub>[<italic>h</italic><sub>l</sub>; <italic>h</italic><sub>r</sub>] + <italic>b</italic><sub>mlp</sub>) (8)</p>
        <p>where [<italic>h</italic><sub>l</sub>; <italic>h</italic><sub>r</sub>] is the concatenation operation, <italic>W</italic><sup>mlp</sup> ∈ <italic>R</italic><sup>k×&#124;</sup><sup>Y</sup><sup>&#124;</sup> is the projection matrix, and <italic>b</italic><sup>mlp</sup> ∈ <italic>R</italic><sup>&#124;Y</sup><sup>&#124;</sup> is the bias vector trained from the data. Finally, the relation representation <italic>r</italic><sub>lr</sub> was input to the <italic>softmax</italic> layer to normalize the probability distribution over possible relation types <italic>Y</italic>. The whole network was trained by a backpropagation algorithm by minimizing the cross-entropy loss between the predicted probabilities and the correct labels.</p>
        <p>We also experimented LSTM with the attention mechanism, which is expected to solve the issue of the information bottleneck in RNNs [<xref ref-type="bibr" rid="ref66">66</xref>]. When RNNs process long text, they encounter a practical difficulty; they must compress the text into a single vector with a fixed size. The purpose of the attention mechanism is to exploit the task-relevant outputs in the past time scales and the current output vector to dynamically refine the final vector representation so that the constructed presentation becomes more informative.</p>
        <p>We used a standard global attention, which has shown to be state-of-the-art in a variety of NLP tasks: machine translation [<xref ref-type="bibr" rid="ref66">66</xref>], question answering [<xref ref-type="bibr" rid="ref67">67</xref>], textual entailment [<xref ref-type="bibr" rid="ref68">68</xref>], and constituency parsing [<xref ref-type="bibr" rid="ref69">69</xref>]. In addition to the last output vectors <italic>h</italic><sub>l</sub> and <italic>h</italic><sub>r</sub>, the global attention explicitly considered all the previous output vectors <italic>h</italic><sub>1</sub>,…, <italic>h</italic><sub>l-1</sub> and <italic>h</italic><sub>1</sub>,…, <italic>h</italic><sub>r-1</sub> to construct attention-weighted representations of the entities <italic>e</italic><sub>l</sub> and <italic>e</italic><sub>r</sub>.</p>
        <p>Concretely, let <italic>S</italic> ∈ <italic>R</italic><sup>k×l</sup> be a matrix of the output vectors <italic>h</italic><sub>1</sub>,…, <italic>h</italic><sub>l</sub> and <italic>o</italic><sub>l</sub> ∈ <italic>R</italic><sup>l</sup> be a vector of ones. An attention weight vector <italic>a</italic>, an attention representation <italic>z</italic>, and the final entity representation <italic>h</italic><sub>l</sub><sup>’</sup> were defined as:</p>
        <p><italic>M</italic> = <italic>tanh</italic> (<italic>W</italic><sub>1</sub><sup>at</sup><italic>S</italic> + <italic>W</italic><sub>2</sub><sup>at</sup><italic>h</italic><sub>l</sub>⊕ <italic>o</italic><sub>l</sub>) (9)</p>
        <p><italic>a</italic> = <italic>softmax</italic> (<italic>w</italic><sup>T</sup><italic>M</italic>) (10)</p>
        <p>z= <italic>Sa</italic><sup>T</sup> (<italic>11</italic>)</p>
        <p><italic>h</italic><sub>l</sub><sup>’</sup>= <italic>tanh</italic> (<italic>W</italic><sub>3</sub><sup>at</sup><italic>z</italic> + <italic>W</italic><sub>4</sub><sup>at</sup><italic>h</italic><sub>l</sub>) (12)</p>
        <p>where <italic>W</italic><sub>1</sub><sup>at</sup>, <italic>W</italic><sub>2</sub><sup>at</sup>, <italic>W</italic><sub>3</sub><sup>at</sup>, <italic>W</italic><sub>4</sub><sup>at</sup> ∈ <italic>R</italic><sup>k×k</sup> are learnable matrices and <italic>w</italic><sup>T</sup> is the transpose of the learnable vector <italic>w</italic> ∈ <italic>R</italic><sup>k</sup>. With the outer product <italic>W</italic><sub>2</sub><sup>at</sup><italic>h</italic><sub>l</sub>⊕ <italic>o</italic><sub>l</sub>, we repeated the transformed vector of <italic>h</italic><sub>l</sub><italic>l</italic> times and then combined the resulting matrix with the projected output vectors. The entity representation <italic>h</italic><sub>r</sub><sup>’</sup> for entity <italic>e</italic><sub>r</sub> was obtained similarly. As for the LSTM-based relation representation, the compositions of the representations were input to an MLP for relation classification.</p>
        <p>We also used the bidirectional version of the aforementioned models by feeding concatenated outputs of the forward and backward LSTM. Due to the concatenated outputs, the size of the <italic>W</italic> matrices and <italic>w</italic> vector now become 2 <italic>k</italic> × 2 <italic>k</italic> and 2 <italic>k</italic>, respectively, increasing the number of parameters to be trained. We have previously shown that bidirectional LSTM outperformed the LSTM models for medication and adverse drug event named-entity recognition tasks in EHRs [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
      </sec>
      <sec>
        <title>Experimental Setup and Evaluation Metrics</title>
        <p>As noted previously, we split the corpus into 602/95/94 train/development/test sets. To cast the task as a multiclass classification problem, we generated <italic>None</italic> relations (negative examples) by replacing one of the entity mentions of a true relation with another entity. In doing so, the only constraint was that the new relation should not exist in the true relation corpus set and the rest should be learned from the data. This process gave us additional negative relation instances of 1,190,328/144,338/202,065 for the train/development/test sets, respectively. For this SVM model, we carried out a grid search over its hyperparameters by using the development set for evaluation. Once the best parameters were found, the final SVM model was learned using the optimized hyperparameters on both the training and development sets.</p>
        <p>We used ADAM (adaptive moment estimation) [<xref ref-type="bibr" rid="ref70">70</xref>] for optimization of the neural models. The size of the LSTM hidden units was set to 100. An additional layer was used to map word vectors to the LSTM input. We used a pretrained word2vec model with a size of 300 [<xref ref-type="bibr" rid="ref43">43</xref>] for word embedding. All neural models were regularized by using 20% input and 30% output dropouts [<xref ref-type="bibr" rid="ref71">71</xref>] and an <italic>l</italic><sub>2</sub> regularizer with strength value 1e-3. The neural models were trained only on the training set. We used the development set to evaluate them for each epoch to choose the best model. The unidirectional models were given 30 epochs and the attentional and bidirectional models were given 60 epochs to converge to an optimum. The final performance of the methods was reported and compared by using the test set.</p>
        <p>Our experiment was guided by macro-averaged precision, recall, and <italic>F</italic> 1-score in terms of positive relation types. False negative (<italic>FN</italic>) and false positives (<italic>FP</italic>) are incorrect negative and positive predictions, respectively. True positive (<italic>TP</italic>) results correspond to correct positive predictions, which were actually correct predictions. Recall (<italic>r</italic>) denotes the percentage of correctly labeled positive results over all positive cases and is calculated as: <italic>r=TP/(TP+FN).</italic> Precision (<italic>p</italic>) is the percentage of correctly labeled positive results over all positive-labeled examples and is calculated as: <italic>p=TP/(TP+FP).</italic> The <italic>F</italic> 1-measure is the harmonic average of precision and recall, and a balanced <italic>F</italic> 1-score is expressed as: <italic>F</italic><sub>1</sub><italic>=2pr/(p+r).</italic></p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>This section presents the results of implementing our relation identification systems. We analyzed the performance of each model and the effects of their free parameters.</p>
      <sec>
        <title>The Rule Induction Baseline</title>
        <p>For this baseline, the distance bins were defined by using the training data. If the token distance of an entity pair did not belong to any of the bins, it was labeled as a <italic>None</italic> relation. This baseline achieved an 7.47% overall F1-score on the test set. Detailed results are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The performance was low, as the method was very simple. The <italic>Dosage</italic> relation type achieved the highest F1-score (30%) among different relations.</p>
      </sec>
      <sec>
        <title>Support Vector Machines–Based Pipeline System</title>
        <p>We performed down-sampling for the negative relations (<italic>None</italic> relations) with varying keep rates to study how the performance changed for different distributions of <italic>None</italic> examples involved in the training set. The development and test sets were kept the same.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> reports the overall F1-score of our SVM model. A higher keep rate means that we used more negative relations in the training set, and that the higher keep rate yielded a better result on the test set in our experiment. We obtained the highest performance with the keep rate value equal to 80% in our SVM model. The training set for this run consisted of 1,096,600 instances, of which 964,520 were <italic>None</italic> relations. In <xref ref-type="table" rid="table4">Table 4</xref>, we show the detailed performance metrics for this model for each relation type when evaluated on the test set. The F1-scores for most relation types were over 80% with <italic>Route</italic> relation achieving the best of 96%, and the recall of our clinical relation extractor was relatively high. However, the performance of the <italic>Indication</italic> and <italic>Adverse</italic> relations were not as high as those of the other relations, and <italic>Indication</italic> showed the worst score of 75%. We observed that 2 entities forming these types of relations tended to be far away from each other and spanned multiple sentences (the average token distance was 19 and 14, and the maximum was 518 and 769). The long distance makes this relation more difficult to detect than other relations.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Results (%) of rule induction classifier on test set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>None</td>
                <td>100</td>
                <td>94</td>
                <td>97</td>
              </tr>
              <tr valign="top">
                <td>Dosage</td>
                <td>20</td>
                <td>63</td>
                <td>30</td>
              </tr>
              <tr valign="top">
                <td>Route</td>
                <td>7</td>
                <td>31</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td>Frequency</td>
                <td>2</td>
                <td>7</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>1</td>
                <td>4</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Indication</td>
                <td>1</td>
                <td>14</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Adverse</td>
                <td>1</td>
                <td>24</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>4.57</td>
                <td>20.42</td>
                <td>7.47</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Overall F1-scores (%) of support vector machines system. Keep rate for negative down-sampling is varied.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Keep rate</td>
                <td>Train</td>
                <td>Development</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.1</td>
                <td>99.99</td>
                <td>99.97</td>
                <td>82.46</td>
              </tr>
              <tr valign="top">
                <td>0.3</td>
                <td>99.96</td>
                <td>99.93</td>
                <td>87.84</td>
              </tr>
              <tr valign="top">
                <td>0.5</td>
                <td>99.94</td>
                <td>99.86</td>
                <td>89.0</td>
              </tr>
              <tr valign="top">
                <td>0.8</td>
                <td>99.89</td>
                <td>99.8</td>
                <td><italic>89.1</italic><sup>a</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Best score on test data are highlighted in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>End-to-End Deep Neural Networks</title>
        <p>We also examined the performance of the neural network models. Notably, by leveraging recent advances in deep learning, including efficient representation learning and attention mechanisms, we addressed the problem without any hand-engineered features.</p>
        <p>As stated earlier in the Methods section, we used a free parameter window size to determine how much local context is considered for entity representation in neural network models. We first examined the effect of this parameter by training the unidirectional LSTM-based model that was the least complex and the fastest to train and to test. The keep rate for down-sampling was set to 0.1 and the window sizes 5, 10, 30, 50, and 70 were studied. <xref ref-type="table" rid="table5">Table 5</xref> presents the results.</p>
        <p>When we considered more context with a larger token window, the performance of the LSTM-based relation extractor improved. However, there appeared to be a small drop starting at the point where size is equal to 50, suggesting that large window size may introduce contextual noise into the model. In addition, the training and test time dramatically increased with the large windows; therefore, we set the window size to 30 in our experiments, unless specified.</p>
        <p>We conducted a similar group of experiments to observe how the different down-sampling rates affected the model learning. Again, we used an LSTM-based model to report the results, because it was the least complex and fastest to train. The results are presented in <xref ref-type="table" rid="table6">Table 6</xref>. This time we observed a different pattern of results. The training error kept decreasing as we included more negative examples in the training set. However, with the keep rate of 0.8, it started showing decreasing performance on the development and the test sets. We used a down-sampling keep rate of 0.5 throughout the experiment.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Results (%) of the best performing support vector machines model on test set. Keep rate=0.8.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>None</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>Dosage</td>
                <td>85</td>
                <td>91</td>
                <td>88</td>
              </tr>
              <tr valign="top">
                <td>Route</td>
                <td>96</td>
                <td>97</td>
                <td>96</td>
              </tr>
              <tr valign="top">
                <td>Frequency</td>
                <td>93</td>
                <td>97</td>
                <td>95</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>89</td>
                <td>93</td>
                <td>91</td>
              </tr>
              <tr valign="top">
                <td>Indication</td>
                <td>72</td>
                <td>77</td>
                <td>75</td>
              </tr>
              <tr valign="top">
                <td>Adverse</td>
                <td>85</td>
                <td>84</td>
                <td>85</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>95</td>
                <td>94</td>
                <td>95</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>87.85</td>
                <td>90.42</td>
                <td>89.1</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Overall F1-score of the long short-term memory (LSTM)–based model. Keep rate=0.1.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Window size</td>
                <td>Train</td>
                <td>Development</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>5</td>
                <td>24.05</td>
                <td>14.09</td>
                <td>14.58</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>23.92</td>
                <td>14.85</td>
                <td>14.56</td>
              </tr>
              <tr valign="top">
                <td>30</td>
                <td>37.40</td>
                <td>21.77</td>
                <td><italic>22.59</italic><sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>50</td>
                <td>32.1</td>
                <td>17.15</td>
                <td>18.43</td>
              </tr>
              <tr valign="top">
                <td>70</td>
                <td>27.62</td>
                <td>15.04</td>
                <td>15.93</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Best score on test data are highlighted in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Overall F1-score of the long short-term memory (LSTM)–based model. Keep rate for negative down-sampling is varied. Window size=10.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Keep rate</td>
                <td>Train</td>
                <td>Development</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.1</td>
                <td>23.92</td>
                <td>14.85</td>
                <td>14.56</td>
              </tr>
              <tr valign="top">
                <td>0.3</td>
                <td>38.91</td>
                <td>35.18</td>
                <td>37.21</td>
              </tr>
              <tr valign="top">
                <td>0.5</td>
                <td>51.25</td>
                <td>39.02</td>
                <td><italic>39.45</italic><sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>0.8</td>
                <td>24.82</td>
                <td>23.65</td>
                <td>21.11</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Best score on test data are highlighted in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Overall F1-score (%) of long short-term memory (LSTM) and attention-based models. Keep rate=0.5, window size=30.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Train</td>
                <td>Development</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LSTM<sup>a</sup></td>
                <td>54.47</td>
                <td>41.43</td>
                <td>42.32</td>
              </tr>
              <tr valign="top">
                <td>Bidirectional LSTM</td>
                <td>86.56</td>
                <td>66.47</td>
                <td>62.79</td>
              </tr>
              <tr valign="top">
                <td>LSTM + Attention</td>
                <td>68.69</td>
                <td>52.71</td>
                <td>54.21</td>
              </tr>
              <tr valign="top">
                <td>Bidirectional LSTM + Attention</td>
                <td>83.71</td>
                <td>68.95</td>
                <td><italic>65.72</italic><sup>b</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>LSTM: Long short-term memory.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>Best score on test data are highlighted in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Results (%) of the best-performing neural model (Bidirectional long short-term memory [LSTM] + Attention) on test set. Keep rate=0.5, window size=30.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F1-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>None</td>
                <td>100</td>
                <td>100</td>
                <td>100</td>
              </tr>
              <tr valign="top">
                <td>Dosage</td>
                <td>78</td>
                <td>80</td>
                <td>79</td>
              </tr>
              <tr valign="top">
                <td>Route</td>
                <td>67</td>
                <td>78</td>
                <td>72</td>
              </tr>
              <tr valign="top">
                <td>Frequency</td>
                <td>61</td>
                <td>76</td>
                <td>68</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>54</td>
                <td>69</td>
                <td>61</td>
              </tr>
              <tr valign="top">
                <td>Indication</td>
                <td>32</td>
                <td>32</td>
                <td>32</td>
              </tr>
              <tr valign="top">
                <td>Adverse</td>
                <td>78</td>
                <td>46</td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>77</td>
                <td>93</td>
                <td>84</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>63.85</td>
                <td>67.71</td>
                <td>65.72</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref ref-type="table" rid="table7">Table 7</xref> shows the performance of variations of the neural models, including the attention-based and the bidirectional LSTM-based relation extractors. The attention-based models always performed better than their corresponding LSTM-based extractors. Furthermore, the bidirectional networks achieve much higher performance than the unidirectional ones. The bidirectional LSTM-based model yielded the highest F-1 training score. However, without the attention mechanism, this model appears to be overfitting. The best performance we obtained on the test set was a 65.72% overall F1-score for positive relation types, which was lower than the one we reported with SVM models. <xref ref-type="table" rid="table8">Table 8</xref> shows the detailed test performance measures of the best-performing neural model (bidirectional LSTM + attention) for each relation type. Most of the relation types had F-1 scores above 70%, and <italic>Severity</italic> relation achieved the best performance of 84%. However, the scores for <italic>Indication, Adverse,</italic> and <italic>Duration</italic> relations were relatively low, with the <italic>Indication</italic> score being the lowest of 32%, which is consistent with SVM models. Nevertheless, the overall result is still promising, given the fact that no feature engineering was conducted and that the training set had only hundreds of examples.</p>
        <p>For SVM models, we performed an efficient grid search over hyper-parameters, and this boosted performance substantially. However, we were not able to do the same for neural network models due to their computational complexity. Instead, we were able to perform a small random search for neural network parameters.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The bidirectional LSTM model with attention achieved the best performance among all the RNN variations, and additional features are shown to help boost the system performance. SVM model yields the best results, outperforming RNN models, but RNN models demonstrate great potential of significant improvement with more annotated data available.</p>
        <p>Both the classic feature engineering-based SVM pipeline and the end-to-end neural network methods have advantages. The SVM model is able to exploit high-dimensional sparse representation (ie, <italic>TF-IDF</italic>), which has traditionally proven to be efficient in clinical NLP tasks. On the other hand, the neural model relies on dense low-dimensional representations that can possibly be constructed in unsupervised fashion from a large unlabeled text, eluding the complicated feature engineering efforts.</p>
        <p>However, the neural models have a large number of training parameters that are tuned during training and are able to learn from a much larger dataset for better performance. For example, our bidirectional LSTM model has 1.4 million training parameters, so tuning this parameter set requires a large amount of data. Unfortunately, it is not trivial to obtain such labeled data in the clinical and biomedical domains. Our training data used in the experiments had hundreds of examples per relation type, which was a very small fraction compared with the bidirectional LSTM training parameters. In general, this is a disadvantage of deep learning approaches, and we empirically validated in our ADE relation identification tasks. In low-resource domains, such as the medical domain explored in this study, the focus of future work needs to be on data-efficient deep learning methods. In addition, the SVM relation extractor is easy to train and is robust with a small dataset. Training of the neural network-based relation extractor requires a graphic processing unit (GPU) and is computationally expensive. For example, 60 epochs of our attention model took 26 hours to complete on a GeForce GTX 980 GPU.</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>We analyzed how well the SVM and attention models performed on short- and long-distance relations. <xref ref-type="fig" rid="figure3">Figure 3</xref> plots the test F1-score of these models against relation distance. The bidirectional LSTM with attention did not perform well on short distance relations, and it was not stable. In contrast, SVM was very stable and performed well for those relations where the distances between the entities are long. Interestingly, the neural network performance decreased to 87% from 100% when the distance was 1100. The performance drop was due to false positives, and the generated negative examples were classified as positive by the model. However, these were the simple cases that even our rule induction classifier was able to easily detect. Therefore, we hypothesize that the neural network makes this obvious mistake because the context features, such as relation representations the model relies on, are not sufficient for the task. To justify this, we included a set of additional features in the neural network model. The token and mention distances and mention type features (in SVM models) were embedded and further used along with the dense-vector relation representations for classification.</p>
        <p>By including these additional features in the neural model, we improved its best result from a 65.72% to a 77.35% F1-score. <xref ref-type="table" rid="table9">Table 9</xref> provides a horizontal comparison of the different methods proposed in this paper. Inclusion of those features in the neural model yielded an approximately 12% improvement, and the performance gap between the neural model and SVM model was also reduced.</p>
        <p>We also conducted a set of experiments to show how the training data size affects the overall performance of the SVM and neural models. We created new training sets with stratified sampling rates of 20%, 40%, 60%, and 80% of the original training data. Both SVM and attention-based bidirectional LSTM models were trained on the new training sets and evaluated on the test data. In <xref ref-type="fig" rid="figure4">Figure 4</xref>, we display the test F1-scores of the models for different sample sizes. The SVM model achieved an F1-score greater than 80% even when trained on 20% of the data, but the performance of the neural model was only around 62%. This demonstrates that feature engineering approach may be preferred over deep learning models when less annotated data are available, as the hand-crafted features in the SVM model has encoded human knowledge, such as domain knowledge and various heuristics.</p>
        <p>However, as the training dataset is increased, we can observe a firm improvement on the performances of the neural models. When we increased the training sample size from 20% to 80%, the neural model improved the test performance from ~62% to ~76, by almost 20%, whereas the improvement range for the SVM model was much smaller, around 8% F1-score. Therefore, the neural model has the potential to improve substantially if a larger training dataset is available.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One limitation of this study is that the size of the data in the experiment is relatively small, and more follow-up study is needed to further verify the findings on a larger dataset or other publicly available datasets (eg, i2b2 data although they only contain intrasentential relations) by exploring more RNN or CNN architectures, which we will investigate in our future work. In addition, the global attention in our LSTM model may not be sufficient to pinpoint important local context, especially for long-distance relations, and it is worth exploring more flexible attention mechanisms on this task.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Test F1-score over relation distance. BiLSTM: bidirectional long short term memory; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i2e29_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Test F1-score over varying training sample size. BiLSTM: bidirectional long short term memory; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="publichealth_v4i2e29_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Comparison of different models in terms of overall F1-score.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="250"/>
            <col width="250"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Train</td>
                <td>Development</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Rule induction classifier</td>
                <td>8.33</td>
                <td>8.74</td>
                <td>7.47</td>
              </tr>
              <tr valign="top">
                <td>Bidirectional LSTM<sup>b</sup></td>
                <td>83.71</td>
                <td>66.47</td>
                <td>62.79</td>
              </tr>
              <tr valign="top">
                <td>Bidirectional LSTM + Attention</td>
                <td>86.56</td>
                <td>68.95</td>
                <td>65.72</td>
              </tr>
              <tr valign="top">
                <td>Bidirectional LSTM + Attention + Features</td>
                <td>88.14</td>
                <td>77.77</td>
                <td>77.35</td>
              </tr>
              <tr valign="top">
                <td>SVM<sup>a</sup> + Features</td>
                <td>87.85</td>
                <td>90.42</td>
                <td><italic>89.1</italic><sup>c</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>LSTM: Long short-term memory</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>SVM: support vector machines.</p>
            </fn>
            <fn id="table9fn3">
              <p><sup>c</sup>Best score on test data are highlighted in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we created a new expert-annotated EHR corpus in the context of ADE relation identification, which will become a valuable resource and benchmark in drug safety surveillance research community. We, then, explored 3 different supervised machine learning models with different levels of complexity to identify 7 types of ADE-related clinical relations. Our results show that the SVM model with a rich feature set achieved the highest performance, surpassing both the rule induction model and the RNN models. The bidirectional LSTM model with attention achieved the best performance among the RNN models, and the additional features are shown to help boost the system performance. However, its performance remains substantially inferior to the performance of the SVM model, although RNN models demonstrate great potential of significant improvement with more annotated data available. Our results indicate that a rich feature set remains crucial for relation identification in clinical text, especially when the training size is small.</p>
        <p>In the future, we will further explore different deep learning architectures (eg, multikernel CNNs, hierarchical RNNs, multilevel attentions) on this task for improved performance. Then, we plan to apply our system to EHRs on a large scale and derive meaningful insights to facilitate efficient and effective drug safety surveillance.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADE</term>
          <def>
            <p>adverse drug event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNNs</term>
          <def>
            <p>convolutional neural networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FDA</term>
          <def>
            <p>Food And Drug Administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">FP</term>
          <def>
            <p>false positives</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">GPU</term>
          <def>
            <p>graphic processing unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">HER</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">MLP</term>
          <def>
            <p>multilayered perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">SVM</term>
          <def>
            <p>support vector machines</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">WVCs</term>
          <def>
            <p>Word Vector Classes</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the grant R01HL125089 from the National Institutes of Health. Any opinions, findings, and conclusions or recommendations expressed in this paper are those of the authors and do not necessarily reflect those of the sponsor.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Haas</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Iyer</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Orav</surname>
            <given-names>EJ</given-names>
          </name>
          <name name-style="western">
            <surname>Schiff</surname>
            <given-names>GD</given-names>
          </name>
          <name name-style="western">
            <surname>Bates</surname>
            <given-names>DW</given-names>
          </name>
        </person-group>
        <article-title>Participation in an ambulatory e-pharmacovigilance system</article-title>
        <source>Pharmacoepidemiol Drug Saf</source>  
        <year>2010</year>  
        <month>09</month>  
        <volume>19</volume>  
        <issue>9</issue>  
        <fpage>961</fpage>  
        <lpage>9</lpage>  
        <pub-id pub-id-type="doi">10.1002/pds.2006</pub-id>
        <pub-id pub-id-type="medline">20623512</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bates</surname>
            <given-names>DW</given-names>
          </name>
          <name name-style="western">
            <surname>Cullen</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Laird</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Petersen</surname>
            <given-names>LA</given-names>
          </name>
          <name name-style="western">
            <surname>Small</surname>
            <given-names>SD</given-names>
          </name>
          <name name-style="western">
            <surname>Servi</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Laffel</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Sweitzer</surname>
            <given-names>BJ</given-names>
          </name>
          <name name-style="western">
            <surname>Shea</surname>
            <given-names>BF</given-names>
          </name>
          <name name-style="western">
            <surname>Hallisey</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Incidence of adverse drug events and potential adverse drug events. Implications for prevention. ADE Prevention Study Group</article-title>
        <source>J Am Med Assoc</source>  
        <year>1995</year>  
        <month>07</month>  
        <day>05</day>  
        <volume>274</volume>  
        <issue>1</issue>  
        <fpage>29</fpage>  
        <lpage>34</lpage>  
        <pub-id pub-id-type="medline">7791255</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Classen</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Pestonik</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Scott</surname>
            <given-names>ER</given-names>
          </name>
          <name name-style="western">
            <surname>Lloyd</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Burke</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Adverse drug events in hospitalized patientsxcess length of stay, extra costs, and attributable mortality</article-title>
        <source>J Am Med Assoc</source>  
        <year>1997</year>  
        <volume>277</volume>  
        <issue>4</issue>  
        <fpage>e301</fpage>  
        <lpage>306</lpage>  
        <pub-id pub-id-type="medline">9002492</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bates</surname>
            <given-names>DW</given-names>
          </name>
          <name name-style="western">
            <surname>Spell</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Cullen</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Burdick</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Laird</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Petersen</surname>
            <given-names>LA</given-names>
          </name>
          <name name-style="western">
            <surname>Small</surname>
            <given-names>SD</given-names>
          </name>
          <name name-style="western">
            <surname>Sweitzer</surname>
            <given-names>BJ</given-names>
          </name>
          <name name-style="western">
            <surname>Leape</surname>
            <given-names>LL</given-names>
          </name>
        </person-group>
        <article-title>The costs of adverse drug events in hospitalized patients. Adverse Drug Events Prevention Study Group</article-title>
        <source>J Am Med Assoc</source>  
        <year>1997</year>  
        <volume>277</volume>  
        <issue>4</issue>  
        <fpage>307</fpage>  
        <lpage>11</lpage>  
        <pub-id pub-id-type="medline">9002493</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nebeker</surname>
            <given-names>JR</given-names>
          </name>
          <name name-style="western">
            <surname>Hoffman</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Weir</surname>
            <given-names>CR</given-names>
          </name>
          <name name-style="western">
            <surname>Bennett</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Hurdle</surname>
            <given-names>JF</given-names>
          </name>
        </person-group>
        <article-title>High rates of adverse drug events in a highly computerized hospital</article-title>
        <source>Arch Intern Med</source>  
        <year>2005</year>  
        <month>05</month>  
        <day>23</day>  
        <volume>165</volume>  
        <issue>10</issue>  
        <fpage>1111</fpage>  
        <lpage>6</lpage>  
        <pub-id pub-id-type="doi">10.1001/archinte.165.10.1111</pub-id>
        <pub-id pub-id-type="medline">15911723</pub-id>
        <pub-id pub-id-type="pii">165/10/1111</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Handler</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Altman</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Perera</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hanlon</surname>
            <given-names>JT</given-names>
          </name>
          <name name-style="western">
            <surname>Studenski</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Bost</surname>
            <given-names>JE</given-names>
          </name>
          <name name-style="western">
            <surname>Saul</surname>
            <given-names>MI</given-names>
          </name>
          <name name-style="western">
            <surname>Fridsma</surname>
            <given-names>DB</given-names>
          </name>
        </person-group>
        <article-title>A systematic review of the performance characteristics of clinical event monitor signals used to detect adverse drug events in the hospital setting</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2007</year>  
        <month>07</month>  
        <volume>14</volume>  
        <issue>4</issue>  
        <fpage>451</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17460130"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M2369</pub-id>
        <pub-id pub-id-type="medline">17460130</pub-id>
        <pub-id pub-id-type="pii">M2369</pub-id>
        <pub-id pub-id-type="pmcid">PMC2244905</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lazarou</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Pomeranz</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Corey</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Incidence of adverse drug reactions in hospitalized patients: a meta-analysis of prospective studies</article-title>
        <source>J Am Med Assoc</source>  
        <year>1998</year>  
        <volume>279</volume>  
        <issue>15</issue>  
        <fpage>1200</fpage>  
        <lpage>5</lpage>  
        <pub-id pub-id-type="medline">9555760</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Classen</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Pestotnik</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Evans</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Burke</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Description of a computerized adverse drug event monitor using a hospital information system</article-title>
        <source>Hosp Pharm</source>  
        <year>1992</year>  
        <volume>27</volume>  
        <issue>9</issue>  
        <fpage>783</fpage>  
        <pub-id pub-id-type="medline">10121426</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kaushal</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Jha</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Franz</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Glaser</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Shetty</surname>
            <given-names>KD</given-names>
          </name>
          <name name-style="western">
            <surname>Jaggi</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Middleton</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Kuperman</surname>
            <given-names>GJ</given-names>
          </name>
          <name name-style="western">
            <surname>Khorasani</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Tanasijevic</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bates</surname>
            <given-names>DW</given-names>
          </name>
          <collab>Brigham and Women's Hospital CPOE Working Group</collab>
        </person-group>
        <article-title>Return on investment for a computerized physician order entry system</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2006</year>  
        <month>05</month>  
        <volume>13</volume>  
        <issue>3</issue>  
        <fpage>261</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16501178"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1984</pub-id>
        <pub-id pub-id-type="medline">16501178</pub-id>
        <pub-id pub-id-type="pii">M1984</pub-id>
        <pub-id pub-id-type="pmcid">PMC1513660</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
        <source>World Health Organization (WHO)</source>  
        <comment>Pharmacovigilance 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.who.int/medicines/areas/quality_safety/safety_efficacy/pharmvigi/en/">http://www.who.int/medicines/areas/quality_safety/safety_efficacy/pharmvigi/en/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6uhg58vQS"/></comment> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Edlavitch</surname>
            <given-names>SA</given-names>
          </name>
        </person-group>
        <article-title>Adverse drug event reporting. Improving the low US reporting rates</article-title>
        <source>Arch Intern Med</source>  
        <year>1988</year>  
        <month>07</month>  
        <volume>148</volume>  
        <issue>7</issue>  
        <fpage>1499</fpage>  
        <lpage>503</lpage>  
        <pub-id pub-id-type="medline">3382293</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rogers</surname>
            <given-names>AS</given-names>
          </name>
          <name name-style="western">
            <surname>Israel</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>CR</given-names>
          </name>
          <name name-style="western">
            <surname>Levine</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>McBean</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Valente</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Faich</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Physician knowledge, attitudes, and behavior related to reporting adverse drug events</article-title>
        <source>Arch Intern Med</source>  
        <year>1988</year>  
        <month>07</month>  
        <volume>148</volume>  
        <issue>7</issue>  
        <fpage>1596</fpage>  
        <lpage>600</lpage>  
        <pub-id pub-id-type="medline">3382304</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Begaud</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Moride</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tubert-Bitter</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Chaslerie</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Haramburu</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>False-positives in spontaneous reporting: should we worry about them?</article-title>
        <source>Br J Clin Pharmacol</source>  
        <year>2012</year>  
        <month>07</month>  
        <day>05</day>  
        <volume>38</volume>  
        <issue>5</issue>  
        <fpage>401</fpage>  
        <lpage>404</lpage>  
        <pub-id pub-id-type="doi">10.1111/j.1365-2125.1994.tb04373.x</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Comparing a knowledge-driven approach to a supervised machine learning approach in large-scale extraction of drug-side effect relationships from free-text biomedical literature</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2015</year>  
        <volume>16 Suppl 5</volume>  
        <fpage>S6</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-16-S5-S6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-16-S5-S6</pub-id>
        <pub-id pub-id-type="medline">25860223</pub-id>
        <pub-id pub-id-type="pii">1471-2105-16-S5-S6</pub-id>
        <pub-id pub-id-type="pmcid">PMC4402591</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Butt</surname>
            <given-names>TF</given-names>
          </name>
          <name name-style="western">
            <surname>Cox</surname>
            <given-names>AR</given-names>
          </name>
          <name name-style="western">
            <surname>Oyebode</surname>
            <given-names>JR</given-names>
          </name>
          <name name-style="western">
            <surname>Ferner</surname>
            <given-names>RE</given-names>
          </name>
        </person-group>
        <article-title>Internet accounts of serious adverse drug reactions: a study of experiences of Stevens-Johnson syndrome and toxic epidermal necrolysis</article-title>
        <source>Drug Saf</source>  
        <year>2012</year>  
        <month>12</month>  
        <day>01</day>  
        <volume>35</volume>  
        <issue>12</issue>  
        <fpage>1159</fpage>  
        <lpage>70</lpage>  
        <pub-id pub-id-type="doi">10.2165/11631950-000000000-00000</pub-id>
        <pub-id pub-id-type="medline">23058037</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
        <source>CISION</source>  
        <year>2013</year>  
        <comment>Adverse event reporting: What pharmaceutical companies need to know 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cision.com/us/2013/12/adverse-event-reporting-pharma/">http://www.cision.com/us/2013/12/adverse-event-reporting-pharma/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6uhyRoqPe"/></comment> </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lardon</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Abdellaoui</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Bellet</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Asfari</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Souvignet</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Texier</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Jaulent</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Beyens</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Burgun</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Bousquet</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Adverse drug reaction identification and extraction in social media: a scoping review</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <month>07</month>  
        <day>10</day>  
        <volume>17</volume>  
        <issue>7</issue>  
        <fpage>e171</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/7/e171/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4304</pub-id>
        <pub-id pub-id-type="medline">26163365</pub-id>
        <pub-id pub-id-type="pii">v17i7e171</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526988</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Abdellaoui</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Schück</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Texier</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Burgun</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Filtering entities to optimize identification of adverse drug reaction from social media: how can the number of words between entities in the messages help?</article-title>
        <source>JMIR Public Health Surveill</source>  
        <year>2017</year>  
        <month>06</month>  
        <day>22</day>  
        <volume>3</volume>  
        <issue>2</issue>  
        <fpage>e36</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://publichealth.jmir.org/2017/2/e36/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/publichealth.6577</pub-id>
        <pub-id pub-id-type="medline">28642212</pub-id>
        <pub-id pub-id-type="pii">v3i2e36</pub-id>
        <pub-id pub-id-type="pmcid">PMC5500778</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rossi</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Knapp</surname>
            <given-names>DE</given-names>
          </name>
          <name name-style="western">
            <surname>Anello</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>O'Neill</surname>
            <given-names>RT</given-names>
          </name>
          <name name-style="western">
            <surname>Graham</surname>
            <given-names>CF</given-names>
          </name>
          <name name-style="western">
            <surname>Mendelis</surname>
            <given-names>PS</given-names>
          </name>
          <name name-style="western">
            <surname>Stanley</surname>
            <given-names>GR</given-names>
          </name>
        </person-group>
        <article-title>Discovery of adverse drug reactions</article-title>
        <source>J Am Med Assoc</source>  
        <year>1983</year>  
        <month>04</month>  
        <day>22</day>  
        <volume>249</volume>  
        <issue>16</issue>  
        <fpage>2226</fpage>  
        <pub-id pub-id-type="doi">10.1001/jama.1983.03330400072029</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gurwitz</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Field</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Harrold</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Rothschild</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Debellis</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Seger</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Cadoret</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Fish</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Garber</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Kelleher</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Incidence and preventability of adverse drug events among older persons in the ambulatory setting</article-title>
        <source>J Am Med Assoc</source>  
        <year>2003</year>  
        <volume>289</volume>  
        <issue>9</issue>  
        <fpage>1107</fpage>  
        <lpage>16</lpage>  
        <pub-id pub-id-type="medline">12622580</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <source>FDA</source>  
        <access-date>2018-03-04</access-date>
        <comment>Questions and Answers on FDA's Adverse Event Reporting System (FAERS) 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.fda.gov/Drugs/GuidanceComplianceRegulatoryInformation/Surveillance/AdverseDrugEffects/default.htm">http://www.fda.gov/Drugs/GuidanceComplianceRegulatoryInformation/Surveillance/AdverseDrugEffects/default.htm</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6uhyyje6x"/></comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McGraw</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rosati</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Evans</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>A policy framework for public health uses of electronic health data</article-title>
        <source>Pharmacoepidemiol Drug Saf</source>  
        <year>2012</year>  
        <month>01</month>  
        <volume>21</volume>  
        <issue>Suppl 1</issue>  
        <fpage>18</fpage>  
        <lpage>22</lpage>  
        <pub-id pub-id-type="doi">10.1002/pds.2319</pub-id>
        <pub-id pub-id-type="medline">22262589</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Honigman</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Rothschild</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Light</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Pulling</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Bates</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Using computerized data to identify adverse drug events in outpatients</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2001</year>  
        <volume>8</volume>  
        <issue>3</issue>  
        <fpage>254</fpage>  
        <lpage>66</lpage>  
        <pub-id pub-id-type="medline">11320070</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Brown</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Kulldorff</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Petronis</surname>
            <given-names>KR</given-names>
          </name>
          <name name-style="western">
            <surname>Reynolds</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Chan</surname>
            <given-names>KA</given-names>
          </name>
          <name name-style="western">
            <surname>Davis</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Graham</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Andrade</surname>
            <given-names>SE</given-names>
          </name>
          <name name-style="western">
            <surname>Raebel</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Herrinton</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Roblin</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Doudreau</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Gurwitz</surname>
            <given-names>JH</given-names>
          </name>
          <name name-style="western">
            <surname>Gunter</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Early adverse drug event signal detection within population-based health networks using sequential methods: key methodologic considerations</article-title>
        <source>Pharmacoepidemiol Drug Saf</source>  
        <year>2009</year>  
        <volume>18</volume>  
        <issue>3</issue>  
        <fpage>226</fpage>  
        <lpage>34</lpage>  
        <pub-id pub-id-type="medline">19148879</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>McPeek</surname>
            <given-names>HE</given-names>
          </name>
          <name name-style="western">
            <surname>Matheny</surname>
            <given-names>ME</given-names>
          </name>
          <name name-style="western">
            <surname>Denny</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Schildcrout</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Miller</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Comparative analysis of pharmacovigilance methods in the detection of adverse drug reactions using electronic medical records</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2013</year>  
        <month>05</month>  
        <day>01</day>  
        <volume>20</volume>  
        <issue>3</issue>  
        <fpage>420</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23161894"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001119</pub-id>
        <pub-id pub-id-type="medline">23161894</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-001119</pub-id>
        <pub-id pub-id-type="pmcid">PMC3628053</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hurdle</surname>
            <given-names>JF</given-names>
          </name>
          <name name-style="western">
            <surname>Weir</surname>
            <given-names>CR</given-names>
          </name>
          <name name-style="western">
            <surname>Roth</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Hoffman</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Nebeker</surname>
            <given-names>JR</given-names>
          </name>
        </person-group>
        <article-title>Critical gaps in the world's largest electronic medical record: Ad Hoc nursing narratives and invisible adverse drug events</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2003</year>  
        <fpage>309</fpage>  
        <lpage>12</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14728184"/>
        </comment>  
        <pub-id pub-id-type="medline">14728184</pub-id>
        <pub-id pub-id-type="pii">D030003423</pub-id>
        <pub-id pub-id-type="pmcid">PMC1480185</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gurulingappa</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Mateen-Rajput</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Toldo</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <source>pdfs.semanticscholar</source>  
        <access-date>2018-03-16</access-date>
        <comment>Extraction of potential adverse drug events from medical case reports 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/8352/a732f635b6071026d165cb920e6e5d0cc934.pdf">https://pdfs.semanticscholar.org/8352/a732f635b6071026d165cb920e6e5d0cc934.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xybziLU3"/></comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kang</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Bui</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Afzal</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>van Mulligen</surname>
            <given-names>EM</given-names>
          </name>
          <name name-style="western">
            <surname>Kors</surname>
            <given-names>JA</given-names>
          </name>
        </person-group>
        <article-title>Knowledge-based extraction of adverse drug events from biomedical text</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2014</year>  
        <month>03</month>  
        <day>04</day>  
        <volume>15</volume>  
        <fpage>64</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-64"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-15-64</pub-id>
        <pub-id pub-id-type="medline">24593054</pub-id>
        <pub-id pub-id-type="pii">1471-2105-15-64</pub-id>
        <pub-id pub-id-type="pmcid">PMC3973995</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Leaman</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Wojtulewicz</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Sullivan</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Skariah</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Towards Internet-Age Pharmacovigilancextracting Adverse Drug Reactions from User Posts to Health-Related Social Networks</article-title>
        <source>Proceedings of the 2010 Workshop on Biomedical Natural Language Processing</source>  
        <year>2010</year>  
        <conf-name>BioNLP '10</conf-name>
        <conf-date>July 15-15, 2010</conf-date>
        <conf-loc>Uppsala, Sweden</conf-loc>
        <fpage>117</fpage>  
        <lpage>125</lpage> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Polepalli</surname>
            <given-names>RB</given-names>
          </name>
          <name name-style="western">
            <surname>Belknap</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Frid</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>West</surname>
            <given-names>DP</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Automatically recognizing medication and adverse event information from food and drug administration's adverse event reporting system narratives</article-title>
        <source>JMIR Med Inform</source>  
        <year>2014</year>  
        <month>06</month>  
        <day>27</day>  
        <volume>2</volume>  
        <issue>1</issue>  
        <fpage>e10</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://medinform.jmir.org/2014/1/e10/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/medinform.3022</pub-id>
        <pub-id pub-id-type="medline">25600332</pub-id>
        <pub-id pub-id-type="pii">v2i1e10</pub-id>
        <pub-id pub-id-type="pmcid">PMC4288072</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Visweswaran</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hanbury</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Saul</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Cooper</surname>
            <given-names>GF</given-names>
          </name>
        </person-group>
        <article-title>Detecting adverse drug events in discharge summaries using variations on the simple Bayes model</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2003</year>  
        <fpage>689</fpage>  
        <lpage>93</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14728261"/>
        </comment>  
        <pub-id pub-id-type="medline">14728261</pub-id>
        <pub-id pub-id-type="pii">D030003761</pub-id>
        <pub-id pub-id-type="pmcid">PMC1479984</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Phansalkar</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>South</surname>
            <given-names>BR</given-names>
          </name>
          <name name-style="western">
            <surname>Hoffman</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Hurdle</surname>
            <given-names>JF</given-names>
          </name>
        </person-group>
        <article-title>Looking for a needle in the haystack? A case for detecting adverse drug events (ADE) in clinical notes</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2007</year>  
        <month>10</month>  
        <day>11</day>  
        <fpage>1077</fpage>  
        <pub-id pub-id-type="medline">18694175</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Iqbal</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Mallah</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Jackson</surname>
            <given-names>RG</given-names>
          </name>
          <name name-style="western">
            <surname>Ball</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ibrahim</surname>
            <given-names>ZM</given-names>
          </name>
          <name name-style="western">
            <surname>Broadbent</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dzahini</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Johnston</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Dobson</surname>
            <given-names>RJ</given-names>
          </name>
        </person-group>
        <article-title>Identification of adverse drug events from free text electronic patient records and information in a large mental health case register</article-title>
        <source>PLoS One</source>  
        <year>2015</year>  
        <month>8</month>  
        <volume>10</volume>  
        <issue>8</issue>  
        <fpage>e0134208</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0134208"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0134208</pub-id>
        <pub-id pub-id-type="medline">26273830</pub-id>
        <pub-id pub-id-type="pii">PONE-D-14-27426</pub-id>
        <pub-id pub-id-type="pmcid">PMC4537312</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aramaki</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Miura</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tonoike</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ohkuma</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Masuichi</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Waki</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Ohe</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Extraction of adverse drug effects from clinical records</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2010</year>  
        <volume>160</volume>  
        <fpage>739</fpage>  
        <lpage>43</lpage>  
        <pub-id pub-id-type="medline">20841784</pub-id></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Henriksson</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Kvist</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dalianis</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Duneld</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Identifying adverse drug event information in clinical notes with distributional semantic representations of context</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>08</month>  
        <day>17</day>  
        <volume>57</volume>  
        <fpage>333</fpage>  
        <lpage>49</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00180-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2015.08.013</pub-id>
        <pub-id pub-id-type="medline">26291578</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(15)00180-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Casillas</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Pérez</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Oronoz</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Gojenola</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Santiso</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Learning to extract adverse drug reaction events from electronic health records in Spanish</article-title>
        <source>Expert Syst Appl</source>  
        <year>2016</year>  
        <month>11</month>  
        <volume>61</volume>  
        <fpage>235</fpage>  
        <lpage>245</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.eswa.2016.05.034</pub-id></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Jung</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Winnenburg</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>NH</given-names>
          </name>
        </person-group>
        <article-title>A method for systematic discovery of adverse drug events from clinical notes</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2015</year>  
        <month>11</month>  
        <volume>22</volume>  
        <issue>6</issue>  
        <fpage>1196</fpage>  
        <lpage>204</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26232442"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocv102</pub-id>
        <pub-id pub-id-type="medline">26232442</pub-id>
        <pub-id pub-id-type="pii">ocv102</pub-id>
        <pub-id pub-id-type="pmcid">PMC4921953</pub-id></nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>LePendu</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Iyer</surname>
            <given-names>SV</given-names>
          </name>
          <name name-style="western">
            <surname>Bauer-Mehren</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Harpaz</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Mortensen</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Podchiyska</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Ferris</surname>
            <given-names>TA</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>NH</given-names>
          </name>
        </person-group>
        <article-title>Pharmacovigilance using clinical notes</article-title>
        <source>Clin Pharmacol Ther</source>  
        <year>2013</year>  
        <month>06</month>  
        <volume>93</volume>  
        <issue>6</issue>  
        <fpage>547</fpage>  
        <lpage>55</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23571773"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/clpt.2013.47</pub-id>
        <pub-id pub-id-type="medline">23571773</pub-id>
        <pub-id pub-id-type="pii">clpt201347</pub-id>
        <pub-id pub-id-type="pmcid">PMC3846296</pub-id></nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Personeni</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Bresso</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Devignes</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dumontier</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Smaïl-Tabbone</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Coulet</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Discovering associations between adverse drug events using pattern structures and ontologies</article-title>
        <source>J Biomed Semantics</source>  
        <year>2017</year>  
        <month>08</month>  
        <day>22</day>  
        <volume>8</volume>  
        <issue>1</issue>  
        <fpage>29</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0137-x"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s13326-017-0137-x</pub-id>
        <pub-id pub-id-type="medline">28830518</pub-id>
        <pub-id pub-id-type="pii">10.1186/s13326-017-0137-x</pub-id>
        <pub-id pub-id-type="pmcid">PMC5567667</pub-id></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Banda</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Evans</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Vanguri</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Tatonetti</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Ryan</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>A curated and standardized adverse drug event resource to accelerate drug safety research</article-title>
        <source>Sci Data</source>  
        <year>2016</year>  
        <month>12</month>  
        <day>10</day>  
        <volume>3</volume>  
        <fpage>160026</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27193236"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/sdata.2016.26</pub-id>
        <pub-id pub-id-type="medline">27193236</pub-id>
        <pub-id pub-id-type="pii">sdata201626</pub-id>
        <pub-id pub-id-type="pmcid">PMC4872271</pub-id></nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Uzuner</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Solti</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Cadag</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Extracting medication information from clinical text</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>5</issue>  
        <fpage>514</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819854"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2010.003947</pub-id>
        <pub-id pub-id-type="medline">20819854</pub-id>
        <pub-id pub-id-type="pii">17/5/514</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995677</pub-id></nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Uzuner</surname>
            <given-names>Ö</given-names>
          </name>
          <name name-style="western">
            <surname>South</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>DuVall</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>2010 i2b2/VA challenge on concepts, assertions, and relations in clinical text</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2011</year>  
        <volume>18</volume>  
        <issue>5</issue>  
        <fpage>552</fpage>  
        <lpage>6</lpage>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000203</pub-id>
        <pub-id pub-id-type="pmcid">PMC3168320</pub-id></nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jagannatha</surname>
            <given-names>AN</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Bidirectional RNN for medical event detection in electronic health records</article-title>
        <source>Proc Conf</source>  
        <year>2016</year>  
        <month>06</month>  
        <volume>2016</volume>  
        <fpage>473</fpage>  
        <lpage>482</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27885364"/>
        </comment>  
        <pub-id pub-id-type="medline">27885364</pub-id>
        <pub-id pub-id-type="pmcid">PMC5119627</pub-id></nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Collobert</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Weston</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bottou</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Karlen</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kavukcuoglu</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Kuksa</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Natural language processing (almost) from scratch</article-title>
        <source>J Mach Learn Res</source>  
        <year>2011</year>  
        <volume>12</volume>  
        <fpage>2493</fpage>  
        <lpage>2537</lpage> </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Andor</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Alberti</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Weiss</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Severyn</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Presta</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ganchev</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Petrov</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Collins</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Globally Normalized Transition-Based Neural Networks</article-title>
        <year>2016</year>  
        <conf-name>the 54th Annual Meeting of the Association for Computational Linguistics</conf-name>
        <conf-date>August 7-12</conf-date>
        <conf-loc>Berlin, Germany</conf-loc>
        <fpage>2442</fpage>  
        <lpage>2452</lpage> </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Mou</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Peng</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Jin</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>Classifying Relations via Long Short Term Memory Networks along Shortest Dependency Path</article-title>
        <year>2015</year>  
        <conf-name>Conf Empir Methods Nat Lang Process</conf-name>
        <conf-date>September 17-21</conf-date>
        <conf-loc>Lisbon, Portugal</conf-loc>
        <fpage>1785</fpage>  
        <lpage>1794</lpage> </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Peng</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Poon</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Quirk</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Toutanova</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Yih</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <source>Cs.jhu.edu</source>  
        <year>2017</year>  
        <access-date>2018-03-17</access-date>
        <comment>Cross-Sentence N-ary Relation Extraction with Graph LSTMs 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.cs.jhu.edu/~npeng/papers/TACL_17_RelationExtraction.pdf">https://www.cs.jhu.edu/~npeng/papers/TACL_17_RelationExtraction.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xychvITD"/></comment> </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Cao</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Melo</surname>
            <given-names>GD</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>Relation Classification via Multi-Level Attention CNNs</article-title>
        <year>2016</year>  
        <conf-name>the 54th Annual Meeting of the Association for Computational Linguistics</conf-name>
        <conf-date>August 7-12</conf-date>
        <conf-loc>Berlin, Germany</conf-loc>
        <fpage>1298</fpage>  
        <lpage>1307</lpage> </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Miwa</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bansal</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>End-to-end Relation Extraction using LSTMs on Sequences and Tree Structures</article-title>
        <year>2016</year>  
        <conf-name>Proc ACL</conf-name>
        <conf-date>August 7-12</conf-date>
        <conf-loc>Berlin, Germany</conf-loc></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Fu</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>A neural joint model for entity and relation extraction from biomedical text</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2017</year>  
        <month>03</month>  
        <day>31</day>  
        <volume>18</volume>  
        <issue>1</issue>  
        <fpage>198</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-017-1609-9"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12859-017-1609-9</pub-id>
        <pub-id pub-id-type="medline">28359255</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12859-017-1609-9</pub-id>
        <pub-id pub-id-type="pmcid">PMC5374588</pub-id></nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lv</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Guan</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Clinical Relation Extraction with Deep Learning</article-title>
        <source>IJHIT</source>  
        <year>2016</year>  
        <month>07</month>  
        <day>31</day>  
        <volume>9</volume>  
        <issue>7</issue>  
        <fpage>237</fpage>  
        <lpage>248</lpage>  
        <pub-id pub-id-type="doi">10.14257/ijhit.2016.9.7.22</pub-id></nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sahu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Anand</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Oruganty</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Gattu</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>arxiv.org</source>  
        <year>2016</year>  
        <access-date>2018-03-17</access-date>
        <comment>Relation extraction from clinical texts using domain invariant convolutional neural network 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1606.09370.pdf">https://arxiv.org/pdf/1606.09370.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xycvyTGd"/></comment> </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Novak</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Lavrač</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Webb</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Supervised descriptive rule induction</article-title>
        <source>Encyclopedia of Machine Learning</source>  
        <year>2011</year>  
        <publisher-loc>Boston, MA</publisher-loc>
        <publisher-name>Springer</publisher-name></nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Munkhdalai</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Batsuren</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Park</surname>
            <given-names>HA</given-names>
          </name>
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>NH</given-names>
          </name>
          <name name-style="western">
            <surname>Ryu</surname>
            <given-names>KH</given-names>
          </name>
        </person-group>
        <article-title>Incorporating domain knowledge in chemical and biomedical named entity recognition with word representations</article-title>
        <source>J Cheminform</source>  
        <year>2015</year>  
        <volume>7</volume>  
        <issue>Suppl 1 Text mining for chemistry and the CHEMDNER track</issue>  
        <fpage>S9</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://dx.doi.org/10.1186/1758-2946-7-S1-S9"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1758-2946-7-S1-S9</pub-id>
        <pub-id pub-id-type="medline">25810780</pub-id>
        <pub-id pub-id-type="pii">1758-2946-7-S1-S9</pub-id>
        <pub-id pub-id-type="pmcid">PMC4331699</pub-id></nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yarzebski</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ramesh</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Goldberg</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Automatically detecting acute myocardial infarction events from EHR text: a preliminary study</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2014</year>  
        <volume>2014</volume>  
        <fpage>1286</fpage>  
        <lpage>93</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954440"/>
        </comment>  
        <pub-id pub-id-type="medline">25954440</pub-id>
        <pub-id pub-id-type="pmcid">PMC4419972</pub-id></nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Brown</surname>
            <given-names>PF</given-names>
          </name>
          <name name-style="western">
            <surname>Desouza</surname>
            <given-names>PV</given-names>
          </name>
          <name name-style="western">
            <surname>Mercer</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Pietra</surname>
            <given-names>VJ</given-names>
          </name>
          <name name-style="western">
            <surname>Lai</surname>
            <given-names>JC</given-names>
          </name>
        </person-group>
        <article-title>Class-based n-gram models of natural language</article-title>
        <source>Comput Linguist</source>  
        <year>1992</year>  
        <volume>18</volume>  
        <issue>4</issue>  
        <fpage>479</fpage> </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>arxiv.org</source>  
        <year>2013</year>  
        <access-date>2018-03-17</access-date>
        <comment>Efficient estimation of word representations in vector space 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1301.3781.pdf">https://arxiv.org/pdf/1301.3781.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xydGYKsS"/></comment> </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Glasmachers</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <source>proceedings.mlr.press</source>  
        <year>2017</year>  
        <access-date>2018-03-17</access-date>
        <comment>Limits of End-to-End Learning 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v77/glasmachers17a/glasmachers17a.pdf">http://proceedings.mlr.press/v77/glasmachers17a/glasmachers17a.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xydP34zO"/></comment> </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Relation Classification: CNN or RNN?</article-title>
        <source>Natural Language Understanding and Intelligent Applications</source>  
        <year>2016</year>  
        <publisher-loc>Cham</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>665</fpage>  
        <lpage>675</lpage> </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <source>arXiv.org</source>  
        <year>2015</year>  
        <access-date>2018-03-17</access-date>
        <comment>Relation Classification via Recurrent Neural Network 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1508.01006.pdf">https://arxiv.org/pdf/1508.01006.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xydY8IOW"/></comment> </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Simard</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Frasconi</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Learning long-term dependencies with gradient descent is difficult</article-title>
        <source>Neural Netw IEEE Trans</source>  
        <year>1994</year>  
        <volume>5</volume>  
        <issue>2</issue>  
        <fpage>157</fpage>  
        <lpage>66</lpage>  
        <pub-id pub-id-type="doi">10.1109/72.279181</pub-id></nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hochreiter</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>The vanishing gradient problem during learning recurrent neural nets and problem solutions</article-title>
        <source>Int J Uncertain Fuzziness Knowl-Based Syst</source>  
        <year>1998</year>  
        <volume>6</volume>  
        <issue>2</issue>  
        <fpage>107</fpage>  
        <pub-id pub-id-type="doi">10.1142/S0218488598000094</pub-id></nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Vinyals</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Le</surname>
            <given-names>QV</given-names>
          </name>
        </person-group>
        <article-title>Sequence to Sequence Learning with Neural Networks</article-title>
        <year>2014</year>  
        <conf-name>NIPS</conf-name>
        <conf-date>December 08 - 13</conf-date>
        <conf-loc>Montreal, Canada</conf-loc>
        <fpage>3104</fpage>  
        <lpage>3112</lpage> </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Graves</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Mohamed</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hinton</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Speech Recognition with Deep Recurrent Neural Networks</article-title>
        <year>2013</year>  
        <conf-name>IEEE ICASSP</conf-name>
        <conf-date>May 26-31</conf-date>
        <conf-loc>Vancouver, BC, Canada</conf-loc>
        <fpage>6645</fpage>  
        <lpage>6649</lpage>  
        <pub-id pub-id-type="doi">10.1109/ICASSP.2013.6638947</pub-id></nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bowman</surname>
            <given-names>SR</given-names>
          </name>
          <name name-style="western">
            <surname>Angeli</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Potts</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Manning</surname>
            <given-names>CD</given-names>
          </name>
        </person-group>
        <article-title>A large annotated corpus for learning natural language inference</article-title>
        <source>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing</source>  
        <year>2015</year>  
        <conf-name>EMNLP</conf-name>
        <conf-date>September 17-21</conf-date>
        <conf-loc>Lisbon, Portugal</conf-loc></nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Boehning</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Cho</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <source>arxiv.org</source>  
        <year>2015</year>  
        <access-date>2018-03-17</access-date>
        <comment>Neural Machine Translation by Jointly Learning to Align and Translate 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1409.0473.pdf">https://arxiv.org/pdf/1409.0473.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xydutE0m"/></comment> </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hermann</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Kočiský</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Grefenstette</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Espeholt</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Kay</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Suleyman</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Blunsom</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <source>arXiv.org</source>  
        <year>2015</year>  
        <access-date>2018-03-17</access-date>
        <comment>Teaching Machines to Read and Comprehend 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1506.03340.pdf">https://arxiv.org/pdf/1506.03340.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xye7stpJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rocktäschel</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Grefenstette</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Hermann</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Kočiský</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Blunsom</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <source>arxiv.org</source>  
        <year>2015</year>  
        <access-date>2018-03-17</access-date>
        <comment>Reasoning about Entailment with Neural Attention 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1509.06664.pdf">https://arxiv.org/pdf/1509.06664.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6xyeAy6dt"/></comment> </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Vinyals</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Kaiser</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Koo</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Petrov</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Hinton</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Grammar as a Foreign Language</article-title>
        <year>2015</year>  
        <conf-name>NIPS</conf-name>
        <conf-date>Dec 7-12</conf-date>
        <conf-loc>Montreal, Canada</conf-loc></nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kingma</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ba</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Adam: a Method for Stochastic Optimization</article-title>
        <year>2014</year>  
        <conf-name>Int Conf Learn Represent</conf-name>
        <conf-date>April 14-16</conf-date>
        <conf-loc>Banff, Canada</conf-loc>
        <fpage>1</fpage>  
        <lpage>13</lpage> </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Hinton</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Krizhevsky</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Salakhutdinov</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>A simple way to prevent neural networks from overfitting</article-title>
        <source>J Mach Learn Res</source>  
        <year>2014</year>  
        <volume>15</volume>  
        <issue>1</issue>  
        <fpage>1929</fpage>  
        <lpage>1958</lpage> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
