<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v3i2e24</article-id>
    <article-id pub-id-type="pmid">28468748</article-id>
    <article-id pub-id-type="doi">10.2196/publichealth.6396</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>TwiMed: Twitter and PubMed Comparable Corpus of Drugs, Diseases, Symptoms, and Their Relations</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Dunn</surname>
          <given-names>Adam</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Lafrado</surname>
          <given-names>Louis</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Kang</surname>
          <given-names>Jaewoo</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Tan</surname>
          <given-names>Aik Choon</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Segura Bedmar</surname>
          <given-names>Isabel</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Alvaro</surname>
        <given-names>Nestor</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>National Institute of Informatics</institution>
        <institution>Department of Informatics</institution>
        <addr-line>2-1-2 Hitotsubashi, Chiyoda-ku</addr-line>
        <addr-line>Tokyo, 101-8430</addr-line>
        <country>Japan</country>
        <phone>81 3 4212 2164</phone>
        <fax>81 3 4212 2150</fax>
        <email>nestoralvaro@gmail.com</email>
      </address>  
      <xref rid="aff2" ref-type="aff">2</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1543-7208</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Miyao</surname>
          <given-names>Yusuke</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0678-3400</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Collier</surname>
          <given-names>Nigel</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7230-4164</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>National Institute of Informatics</institution>
    <institution>Department of Informatics</institution>  
    <addr-line>Tokyo</addr-line>
    <country>Japan</country></aff>
    <aff id="aff2">
      <sup>2</sup>
      <institution>The Graduate University for Advanced Studies (SOKENDAI)</institution>
      <addr-line>Kanagawa</addr-line>
      <country>Japan</country>
    </aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Faculty of Modern &#38; Medieval Languages</institution>
    <institution>Department of Theoretical and Applied Linguistics</institution>  
    <institution>University of Cambridge</institution>  
    <addr-line>Cambridge</addr-line>
    <country>United Kingdom</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Nestor Alvaro 
      <email>nestoralvaro@gmail.com</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Apr-Jun</season><year>2017</year></pub-date>
    <pub-date pub-type="epub">
      <day>03</day>
      <month>05</month>
      <year>2017</year>
    </pub-date>
    <volume>3</volume>
    <issue>2</issue>
    <elocation-id>e24</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>28</day>
        <month>7</month>
        <year>2016</year>
      </date>
      <date date-type="rev-request">
        <day>22</day>
        <month>8</month>
        <year>2016</year>
      </date>
      <date date-type="rev-recd">
        <day>24</day>
        <month>11</month>
        <year>2016</year>
      </date>
      <date date-type="accepted">
        <day>20</day>
        <month>3</month>
        <year>2017</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Nestor Alvaro, Yusuke Miyao, Nigel Collier. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 03.05.2017.</copyright-statement>
    <copyright-year>2017</copyright-year>
    <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://publichealth.jmir.org/2017/2/e24/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Work on pharmacovigilance systems using texts from PubMed and Twitter typically target at different elements and use different annotation guidelines resulting in a scenario where there is no comparable set of documents from both Twitter and PubMed annotated in the same manner.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>This study aimed to provide a comparable corpus of texts from PubMed and Twitter that can be used to study drug reports from these two sources of information, allowing researchers in the area of pharmacovigilance using natural language processing (NLP) to perform experiments to better understand the similarities and differences between drug reports in Twitter and PubMed.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We produced a corpus comprising 1000 tweets and 1000 PubMed sentences selected using the same strategy and annotated at entity level by the same experts (pharmacists) using the same set of guidelines.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>The resulting corpus, annotated by two pharmacists, comprises semantically correct annotations for a set of drugs, diseases, and symptoms. This corpus contains the annotations for 3144 entities, 2749 relations, and 5003 attributes.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>We present a corpus that is unique in its characteristics as this is the first corpus for pharmacovigilance curated from Twitter messages and PubMed sentences using the same data selection and annotation strategies. We believe this corpus will be of particular interest for researchers willing to compare results from pharmacovigilance systems (eg, classifiers and named entity recognition systems) when using data from Twitter and from PubMed. We hope that given the comprehensive set of drug names and the annotated entities and relations, this corpus becomes a standard resource to compare results from different pharmacovigilance studies in the area of NLP.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>Twitter</kwd>
      <kwd>PubMed</kwd>
      <kwd>corpus</kwd>
      <kwd>pharmacovigilance</kwd>
      <kwd>natural language processing</kwd>
      <kwd>text mining</kwd>
      <kwd>annotation</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Corpora annotated for adverse drug events are becoming important in order to train computers to automatically build adverse drug reaction profiles for post marketing surveillance.</p>
      <p>Researchers are typically interested in understanding the accuracy of their systems [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref8">8</xref>], whereas at the same time only a limited number of corpora exist [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>Pharmacovigilance (drug safety) systems using texts obtained from the scientific literature have received attention for many years [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>] and since recently researchers started exploring Twitter and other nonscientific texts where patients describe diseases and symptoms [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. However, there is currently no way to systematically compare systems performance across text types.</p>
      <p>In this paper we provide a benchmark corpus composed of semantically correct annotations that can be used in natural language processing (NLP) studies and show our approach to produce a comparable corpora using texts from Twitter and PubMed, explaining our strategy for controlling external variables that may affect the sample.</p>
      <p>Social media texts are known for containing a high proportion of ungrammatical constructions out of vocabulary words, abbreviations, and metaphoric usage [<xref ref-type="bibr" rid="ref13">13</xref>], whereas scientific texts are known for the use of specialized vocabulary and well-formed sentences. Secondary key factors involved in a direct comparison are the data selection methods and the topicality [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>Existing corpora from PubMed and Twitter cannot be directly compared and the goal of this research is to produce a comparable corpus of drug-related sentences targeting at the same set of drugs.</p>
      <p>To date, most of the curated corpora for pharmacovigilance come from scientific formal texts obtained from PubMed [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], although datasets curated from other scientific resources, such as the Khresmoi project [<xref ref-type="bibr" rid="ref17">17</xref>], are also available [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>Since a few years ago, corpora obtained from social media texts started emerging. At first, researchers focused on blogs and forums [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] and then on Twitter’s data [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>] due to the high volume of the information it provides, with 310 million monthly active users [<xref ref-type="bibr" rid="ref23">23</xref>] generating over 500 million tweets per day [<xref ref-type="bibr" rid="ref24">24</xref>] and also motivated by its “realtime” information, allowing health researchers to potentially investigate and identify new adverse drug event (ADE) types faster than traditional methods such as physician reports.</p>
      <p>Researchers have assessed the number of scientific works in PubMed where Twitter data was used [<xref ref-type="bibr" rid="ref25">25</xref>], finding that the interest in Twitter is growing within the medical domain. Pharmacovigilance sees Twitter as a useful resource in different areas. Messages found in Twitter tweets can help researchers to understand temporal patterns on the drugs usage [<xref ref-type="bibr" rid="ref26">26</xref>], can provide a good resource for obtaining first-hand experience reports on the drugs use [<xref ref-type="bibr" rid="ref21">21</xref>], and can be useful in the early detection of prescription medication abuse [<xref ref-type="bibr" rid="ref27">27</xref>] and adverse events [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      <p>Recent examples where researchers used Twitter to build different corpora are: (1) the corpus built to understand patient experiences at health care facilities [<xref ref-type="bibr" rid="ref29">29</xref>], (2) the corpus built to measure the public interest and concerns about different diseases [<xref ref-type="bibr" rid="ref30">30</xref>], and (3) the corpus used to assess the positive or negative attitude toward specific treatments [<xref ref-type="bibr" rid="ref31">31</xref>]. Although texts written in English have been used very frequently in NLP for pharmacovigilance, texts in Spanish extracted from social media [<xref ref-type="bibr" rid="ref32">32</xref>] and French clinical texts [<xref ref-type="bibr" rid="ref33">33</xref>] have been also used.</p>
      <p>The work on Twitter and PubMed is an ongoing effort resulting in promising NLP studies on the automatic recognition of medications and adverse events [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] and also on the medical question answering [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. By releasing our corpus, we hope other researchers can benefit from it and continue advancing in this area.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Selection</title>
        <p>For our study, we selected a set of 30 different drugs used in other pharmacovigilance studies [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Including these drugs allowed us to cover different interests in the research community and also allowed us to account for drugs used to treat very different conditions such as cancer [<xref ref-type="bibr" rid="ref3">3</xref>], attention deficit disorders [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], schizophrenia [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], or depression [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        <p>We employed Twitter’s application programming interface (API) to download messages mentioning any of those drug names or their synonyms by running our script from September 7, 2015 to October 10, 2015, obtaining 165,489 tweets. In the case of PubMed, we obtained the list of articles about those drugs by using EuropePMC RESTful Web Services [<xref ref-type="bibr" rid="ref40">40</xref>], issuing our query on October 21, 2015 to search for texts containing the same keywords that we used when collecting tweets. Once we had the list of PubMed articles, we processed them to extract the sentences containing the drug mentions obtaining 29,435 sentences.</p>
        <p>From these sentences, we removed all non-ASCII (American standard code for information interchange) characters (eg, emojis), replaced all user name mentions with “__username__,” all email addresses with “__email__,” and all numbers with “__number__.” We also reduced characters elongation by removing the repetition of a character after the second occurrence eg, “greeeeeeat” would become “greeat”), and lowercased all sentences.</p>
        <p>Using the preprocessed sentences and aiming at maximizing the informativeness and the variability of the texts, we limited the number of tweets any user could contribute to 5 and discarded sentences shorter than 20 characters in length, retweets, tweets not written in English, sentences containing keywords related to marketing campaigns (for this we created a list built heuristically using 5 words commonly related to marketing campaigns: “buy,” “cheap,” “online,” “pharmacy,” “price”), and also discarded sentences including URLs.</p>
        <p>To discard possibly duplicated sentences, we stored 40-character long substrings appearing in the chosen sentences and searched for these substrings in the candidate sentences keeping only the messages not containing them. For each chosen sentence, we only stored one substring composed of a maximum of 40 characters (less for sentences shorter than 60 characters in length), extracted from the character in position 20th onwards. This decision was driven by the observation that there were a number of tweets conveying the same information using minor rewording for the sentences, making them unique. In this scenario, discarding the sentences replicating information contributes to increase the information diversity.</p>
        <p>This strategy aims at further increasing the variability of the texts by filtering out similar messages, and in case of selecting the message “Lisinopril is used for treating high blood pressure alone or with other medicines. Other names for this medication. Acecomb, Acelisino” by extracting the characters in position 20 to position 40 (“or treating high blood pressure alone or”), we are able to discard possible duplicated sentences such as “lisinopril and hctz 20 mg 25 mg—national institutes of...lisinopril is used for treating high blood pressure alone or with other,” and similarly the system is also able to discard the sentence “Jun 29, 2015...Active Ingredient: Lisinopril. Prinivil is used for treating high blood pressure alone or with other medicines. Other.” This strategy also showed its usefulness when applied to PubMed sentences as observed in the substring “mg oral granules are bioequivalent to s” appearing twice in the same article, first in the abstract (“Sandoz montelukast 4 mg oral granules are bioequivalent to Singulair 4 mg mini oral granules, with a similar safety profile <italic>.</italic> ”), and also in the discussion (“The current study has clearly demonstrated that Sandoz montelukast 4 mg oral granules are bioequivalent to Singulair mini 4 mg oral granules in terms of the rate and extent of absorption of each formulation <italic>.</italic> ”), thereby showing that this method can help in reducing the amount of duplicated information.</p>
        <p>Out of the resulting sentences, we automatically selected 6000 sentences each for both Twitter and PubMed, which we extracted in a round-robin fashion aiming at a balanced sample of the drug mentions.</p>
        <p>We were interested in finding which sentences would be of interest, for which we divided the main task in two phases. During the first phase, both annotators were requested to perform a sentence level annotation to extract 1000 positive sentences (ie, the sentences mentioning drugs, symptoms, and diseases related to the drug effects in humans) out of the 6000 sentences. In the second phase, the annotators would use the annotation guidelines to identify the entities and relations appearing in the 1000 sentences identified during the first phase.</p>
        <p>The aim of this pipeline is to filter the most informative sentences, discarding those sentences that are prone to include information that is not of high relevance for pharmacovigilance studies.</p>
        <p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the pipeline used to filter, classify, and annotate the sentences. Despite the difference in the initial number of raw sentences we had from Twitter and PubMed (165,489 tweets and 29,435 PubMed sentences), the steps described in the figure provided the same number of sentences at the end of each process.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Annotation pipeline. The initial number of raw sentences differed between twitter (165,489 tweets) and PubMed (29,435 sentences).</p>
          </caption>
          <graphic xlink:href="jmir_v3i2e24_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Selecting the Annotators</title>
        <p>We identified 6 people who were willing to contribute to the task and prepared a test to identify the best candidates. We provided them with 20 sentences from Twitter and 20 sentences from PubMed obtained from the 6000 sentences we had previously filtered, and their task was to identify the sentences containing a mention to a drug and a related disease or symptom. The 6 candidate annotators had different backgrounds: one of them was a native English speaker, three of them were pharmacists, and the last two of them were active social media users. Except for the native English speaker, the rest of the annotators were native Spanish speakers able to read English texts. Although the pharmacists are not referred to as “active social media users,” the three of them were millennials who used social media networks to some extent.</p>
      </sec>
      <sec>
        <title>Annotation</title>
        <p>The annotation guidelines were prepared after reviewing existing guidelines used in other pharmacovigilance projects. In the ADE corpus guidelines [<xref ref-type="bibr" rid="ref10">10</xref>], the researchers annotated the drugs, adverse effects, dosages, and the existing relations between these elements, whereas in the meta-knowledge annotation of bio-events [<xref ref-type="bibr" rid="ref11">11</xref>], the researchers followed a slightly different approach and focused on different “dimensions” of the biomedical events. Those dimensions can be thought as attributes of those events as these dimensions are the knowledge type, the level of certainty, the polarity, the manner, and the source of the annotated event.</p>
        <p>Similarly, to the annotation of the ADE corpus, the Arizona disease corpus (AZDC) annotation guidelines [<xref ref-type="bibr" rid="ref41">41</xref>] focused on the annotation of the diseases, also covering syndromes, illnesses, and disorders. Another document consulted to prepare the first draft of our annotation guidelines was the shared annotated resources (ShARe) or Conference and Labs of the Evaluation Forum (CLEF) eHealth 2013 shared task I [<xref ref-type="bibr" rid="ref12">12</xref>], where the authors annotated disorders using the concept unique identifier (CUI), also clarifying that a disorder is understood as “any span of text that can be mapped to a concept in the SNOMED-CT terminology, which belongs to the Disorder semantic group,” clarifying that the Disorder semantic group should include “congenital abnormalities,” “diseases or syndromes,” and “signs and symptoms” among others.</p>
        <p>Other supporting document used to prepare the annotation guidelines was the annotation guidelines for the drug-drug interaction (DDI) corpus [<xref ref-type="bibr" rid="ref9">9</xref>], where the authors focused on the annotation of a number of entities such as drugs approved for human use, brand names for approved drugs, drugs that have not been approved for human use, and different drug groups. These guidelines also describe the annotation for different types of relations existing between the entities: “advice,” “effect,” “mechanism,” or “other.”</p>
        <p>The details on the resulting corpora produced by the researchers using the aforementioned annotation guidelines can be found in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Details on the resulting corpora produced by the researchers who used the guidelines we reviewed.</p>
          </caption>
          <table width="500" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="85"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="75"/>
            <thead>
              <tr valign="top">
                <td>Corpus name</td>
                <td>DDI<sup>a</sup>corpus</td>
                <td>ADE<sup>b</sup>corpus</td>
                <td>AZDC<sup>c</sup>corpus</td>
                <td>ShARe<sup>d</sup> or CLEF<sup>e</sup> eHealth 2013 Task I</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Annotated entities</td>
                <td>Pharmacological substances</td>
                <td>Drug, adverse effects, dosages</td>
                <td>Diseases</td>
                <td>Disorders</td>
              </tr>
              <tr valign="top">
                <td>Annotated relations</td>
                <td>Drug-drug interactions</td>
                <td>Drug-adverse effect, drug-dosage</td>
                <td>-</td>
                <td>-</td>
              </tr>
              <tr valign="top">
                <td>Texts origin</td>
                <td>DrugBank and MEDLINE</td>
                <td>MEDLINE</td>
                <td>PubMed abstracts</td>
                <td>Clinical notes</td>
              </tr>
              <tr valign="top">
                <td>Number of documents</td>
                <td>1025</td>
                <td>2972</td>
                <td>794 (2775 sentences)</td>
                <td>200</td>
              </tr>
              <tr valign="top">
                <td>Number of annotators</td>
                <td>2</td>
                <td>3 (after automatic annotation)</td>
                <td>2 (after automatic annotation)</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Availability</td>
                <td>Free use for academic research</td>
                <td>Free</td>
                <td>Free</td>
                <td>Upon request</td>
              </tr>
              <tr valign="top">
                <td>Annotation Tool</td>
                <td>Brat</td>
                <td>Knowtator</td>
                <td>In-house tool</td>
                <td>Knowtator</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>DDI: drug-drug interaction.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>ADE: adverse drug event.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>AZDC: Arizona disease corpus.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>ShARe: shared annotated resources.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>CLEF: Conference and Labs of the Evaluation Forum.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As shown in <xref ref-type="table" rid="table1">Table 1</xref>, for the annotation of these corpora, researchers have used tools such as Brat [<xref ref-type="bibr" rid="ref42">42</xref>] and Knowtator [<xref ref-type="bibr" rid="ref43">43</xref>]. In our case, Brat tool was chosen after taking into account that it is a Web-based annotation tool that eases key elements of the annotation process.</p>
        <p>The use of the mentioned guidelines eased the task of generating the first draft of our guidelines, and allowed us to identify which were the entities, relations, and attributes to be annotated. This first draft was then used by three external annotators with a background in computer science who annotated a small set of PubMed and Twitter sentences. During that first annotation period, we had daily meetings after each annotation session and refined the guidelines upon the discrepancies we found and the questions raised by the annotators. Those comments and question in combination with the information we found in the existing guidelines was used to provide the annotators with an updated version of the guidelines for the next annotation session.</p>
        <p>After 2 weeks and 6 annotation sessions, the number of discrepancies was reduced to a minimum and no more questions were raised, leading us to agree on freezing the guidelines so that these would be used as they were.</p>
        <p>The final version of the guidelines used in our study includes three different entities: (1) Drug: any of the marketed medicines that appears in the SIDER database [<xref ref-type="bibr" rid="ref44">44</xref>], which is also listed in the closed set of drugs we provided to the annotators, (2) symptom: any sign or symptom contained in MedDRA [<xref ref-type="bibr" rid="ref45">45</xref>] ontology, and (3) disease: any disease contained in MedDRA ontology.</p>
        <p>The use of SIDER, which contains information on marketed medicines extracted from public documents and package inserts, and MedDRA, a medical terminology dictionary aimed at easing the annotators’ task by providing them two well-known resources to provide the annotated entities with a standardized concept identifier. We believe the fact that those resources are of common use in the research community, and combined with the current trend to map concepts in these databases to concepts in other resources [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], provides an important element toward TwiMed corpus reuse.</p>
        <list list-type="bullet">
          <list-item>
            <p>Polarity: Used to indicate whether the entity was negated or not. The negation had to be a linguistic negation (“not,” “don’t”...).</p>
          </list-item>
          <list-item>
            <p>Person: Used to indicate whether the entity was affecting the “1st,” “2nd,” “3rd” person, or whether there was no information. This attribute was based on the original sender.</p>
          </list-item>
          <list-item>
            <p>Modality: Used to indicate whether the entity was stated in an “actual,” “hedged,” “hypothetical,” or “generic” way.</p>
          </list-item>
          <list-item>
            <p>Exemplification: Used to indicate whether the entity was presented using an example or a description. This attribute was only to be used when the entity was presented through an exemplification.</p>
          </list-item>
          <list-item>
            <p>Duration: Used to indicate whether the entity’s lasting span was “intermittent,” “regular,” “irregular,” or not stated. In the case of drugs, this attribute referred to the time span when the drug had been taken.</p>
          </list-item>
          <list-item>
            <p>Severity: Used to indicate whether the seriousness of an entity was “mild,” “severe,” or not stated. This was the only attribute that did not apply to drugs.</p>
          </list-item>
          <list-item>
            <p>Status: Used to indicate whether the duration of the entity was “complete,” “continuing,” or not stated. In the case of drugs, this attribute referred to the time span when the drug was perceived as having effect.</p>
          </list-item>
          <list-item>
            <p>Sentiment: Used to indicate whether the entity was perceived as “positive,” “negative,” or “neutral.”</p>
          </list-item>
          <list-item>
            <p>Entity identifier: Used to indicate the CUI for that entity. This was the only attribute that had to be filled for all annotated entities. For this attribute we provided a list of allowed values, and used the value “−1” (not found) for entities whose CUI would not be present in the list.</p>
          </list-item>
        </list>
        <p>The list of attributes was decided based on the combination of elements noticed to be annotated in pharmacovigilance studies using formal texts (eg, “duration” or “modality”), as well as in pharmacovigilance studies using informal texts (eg, “polarity” or “sentiment”).</p>
        <list list-type="bullet">
          <list-item>
            <p>Reason-to-use: Used to represent the relation appearing when a symptom or disease leads to the use of some drug.</p>
          </list-item>
          <list-item>
            <p>Outcome-positive: Used to represent the relation between a drug, and an expected or unexpected symptom or disease appearing after the drug consumption. The outcome had to be positive.</p>
          </list-item>
          <list-item>
            <p>Outcome-negative: Used to represent the relation between a drug, and an expected or unexpected symptom or disease appearing after the drug consumption. The outcome had to be negative.</p>
          </list-item>
        </list>
        <p>These elements are further explained in the annotations guidelines that are shared in the <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>Once the guidelines were ready and the annotators were chosen, we preprocessed the sentences before presenting them to the annotators by replacing the existing emojis with a string describing each character, and discarded other non-ASCII characters. We also decided not to lower case the sentences as we thought that would ease the annotator’s task to detect some sentiments and disambiguate acronyms. Besides these changes, the preprocessing strategy is the same we described in the “data selection” section.</p>
        <p>To compare the annotations produced by the experts, we focused on both the “type” assigned to the entity (ie, disease, drug, or symptom) and also on the offsets for that entity. Taking that into account, we decided to compute the results when using relaxed constraints and strict constraints. In the case of using relaxed constraints, we say that the entity annotated by both annotators is a match if the type for the entity matches between annotations and the spans of those annotations have some overlap. In the case of using strict constraints, the match would happen if the type in both annotations matches and the spans for the annotated entities have the same offsets. Discontinuous annotations were allowed and taken into account when computing the matches, which means that in case of using strict constraints, all the spans taking part on the entity’s annotation should be the same.</p>
        <p>We measured the level of agreement between the annotations produced by our experts following the inter annotator agreement (IAA) measure in the CLEF corpus [<xref ref-type="bibr" rid="ref48">48</xref>]. This IAA metric is reported to approximate the kappa score [<xref ref-type="bibr" rid="ref48">48</xref>], and to be more suited for this case [<xref ref-type="bibr" rid="ref49">49</xref>]:</p>
        <p>IAA=matches/(matches+nonmatches)</p>
        <p>In our case matches accounts for the total number of token matches for which both annotators agreed, and matches + nonmatches counts all annotations performed by the annotator being evaluated.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Selection</title>
        <p>Out of the 6000 selected sentences each for both Twitter and PubMed that we extracted, we observed differing sample frequencies of each drug. In both Twitter and PubMed, some drugs attracted more attention than others, although in the case of Twitter, temporal variability is a known fact [<xref ref-type="bibr" rid="ref26">26</xref>] that has to be taken into consideration.</p>
        <p>We found that the frequency of the drugs in the extracted sample had no correlation between Twitter and PubMed (Spearman rho=.03), as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Total number of sentences for each drug name in Twitter and PubMed.</p>
          </caption>
          <table width="350" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="130"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Drug name</td>
                <td># Tweets</td>
                <td># Sentences in PubMed</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Bevacizumab</td>
                <td>69</td>
                <td>239</td>
              </tr>
              <tr valign="top">
                <td>Buprenorphine</td>
                <td>363</td>
                <td>244</td>
              </tr>
              <tr valign="top">
                <td>Carbamazepine</td>
                <td>74</td>
                <td>239</td>
              </tr>
              <tr valign="top">
                <td>Ciprofloxacin</td>
                <td>81</td>
                <td>250</td>
              </tr>
              <tr valign="top">
                <td>Citalopram</td>
                <td>331</td>
                <td>251</td>
              </tr>
              <tr valign="top">
                <td>Cortisone</td>
                <td>344</td>
                <td>231</td>
              </tr>
              <tr valign="top">
                <td>Destroamphetamine sulphate</td>
                <td>373</td>
                <td>19</td>
              </tr>
              <tr valign="top">
                <td>Docetaxel</td>
                <td>34</td>
                <td>246</td>
              </tr>
              <tr valign="top">
                <td>Duloxetine</td>
                <td>242</td>
                <td>241</td>
              </tr>
              <tr valign="top">
                <td>Fluoxetine</td>
                <td>344</td>
                <td>238</td>
              </tr>
              <tr valign="top">
                <td>Fluvoxamine maleate</td>
                <td>13</td>
                <td>204</td>
              </tr>
              <tr valign="top">
                <td>Lamotrigine</td>
                <td>168</td>
                <td>242</td>
              </tr>
              <tr valign="top">
                <td>Lisdexamfetamine</td>
                <td>348</td>
                <td>84</td>
              </tr>
              <tr valign="top">
                <td>Lisinopril</td>
                <td>56</td>
                <td>147</td>
              </tr>
              <tr valign="top">
                <td>Melphalan</td>
                <td>2</td>
                <td>234</td>
              </tr>
              <tr valign="top">
                <td>Methylphenidate hydrochloride</td>
                <td>349</td>
                <td>112</td>
              </tr>
              <tr valign="top">
                <td>Modafinil</td>
                <td>287</td>
                <td>10</td>
              </tr>
              <tr valign="top">
                <td>Montelukast</td>
                <td>71</td>
                <td>239</td>
              </tr>
              <tr valign="top">
                <td>Olanzapine</td>
                <td>190</td>
                <td>248</td>
              </tr>
              <tr valign="top">
                <td>Paroxetine</td>
                <td>365</td>
                <td>249</td>
              </tr>
              <tr valign="top">
                <td>Prednisone</td>
                <td>350</td>
                <td>249</td>
              </tr>
              <tr valign="top">
                <td>Quetiapine</td>
                <td>339</td>
                <td>247</td>
              </tr>
              <tr valign="top">
                <td>Rupatadine</td>
                <td>1</td>
                <td>45</td>
              </tr>
              <tr valign="top">
                <td>Sertraline</td>
                <td>343</td>
                <td>236</td>
              </tr>
              <tr valign="top">
                <td>Tamoxifen</td>
                <td>122</td>
                <td>238</td>
              </tr>
              <tr valign="top">
                <td>Topiramate</td>
                <td>133</td>
                <td>231</td>
              </tr>
              <tr valign="top">
                <td>Trazodone</td>
                <td>206</td>
                <td>70</td>
              </tr>
              <tr valign="top">
                <td>Triamcinolone acetonide</td>
                <td>14</td>
                <td>253</td>
              </tr>
              <tr valign="top">
                <td>Venlafaxine</td>
                <td>326</td>
                <td>238</td>
              </tr>
              <tr valign="top">
                <td>Ziprasidone</td>
                <td>62</td>
                <td>226</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Selecting the Annotators</title>
        <p>To evaluate the annotator’s performance, we used a gold standard set of labels that we generated obtaining the majority vote from the results we received from the 6 annotators and the annotations produce by the first author of the paper, also giving more weight to the pharmacists’ annotations in PubMed and to social media users’ annotations in Twitter. That is, when there were clear differences between the annotations provided by the contributors with the higher weights and the rest of the annotators, we took the former annotations into account.</p>
        <p>As can be seen in <xref ref-type="table" rid="table3">Table 3</xref>, one pharmacist scored the best result, 87.5% agreement with the gold standard data (35 out of 40 sentences were correctly labelled).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Agreement with gold standard data during the annotator selection phase. We compared the results from 2 very active social media users, one native English speaker and 3 pharmacists. We indicate between brackets the time it took to complete the annotation for that dataset (time in min).</p>
          </caption>
          <table width="500" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="133"/>
            <col width="133"/>
            <col width="133"/>
            <col width="101"/>
            <thead>
              <tr valign="top">
                <td>Annotator</td>
                <td>Twitter (min)</td>
                <td>PubMed (min)</td>
                <td>Total (min)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Social1</td>
                <td>0.70 (9)</td>
                <td>0.80 (10)</td>
                <td>0.75 (19)</td>
              </tr>
              <tr valign="top">
                <td>Social2</td>
                <td>1.00 (8)</td>
                <td>0.70 (7)</td>
                <td>0.85 (15)</td>
              </tr>
              <tr valign="top">
                <td>Native speaker</td>
                <td>0.85 (6)</td>
                <td>0.50 (6)</td>
                <td>0.67 (12)</td>
              </tr>
              <tr valign="top">
                <td>Pharmacist1</td>
                <td>0.90 (8)</td>
                <td>0.85 (7)</td>
                <td>0.87 (15)</td>
              </tr>
              <tr valign="top">
                <td>Pharmacist2</td>
                <td>0.70 (11)</td>
                <td>0.80 (9)</td>
                <td>0.75 (20)</td>
              </tr>
              <tr valign="top">
                <td>Pharmacist3</td>
                <td>0.50 (15)</td>
                <td>0.70 (15)</td>
                <td>0.60 (30)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Those results were in line with our expectations as social media users got the best scores in social media texts, and the best scores in PubMed texts were obtained by the pharmacists. However, we were very surprised by the low scores obtained by Pharmacist3 and the native English speaker. We followed up with them discovering that Pharmacists3 had some trouble understanding the samples because of those being written in English language (it was also evidenced in the time it took her to complete the task). In the case of the native English speaker, he reported that he was not an active social media user and requested further information on the set of tweets as he found those texts to be hard to understand. Overall, we discovered the native English speaker was too cautious when indicating which sentences were positive cases as he annotated 7 sentences as positive out of the 40 sentences (the gold standard data had 16 sentences tagged as positive sentences), whereas the rest of the annotators indicated 13-18 sentences were positive (Pharmacist3, who obtained the lowest score, was above that range as she annotated 24 sentences as positive).</p>
        <p>We decided to hire Pharmacists1 as she scored the best results, and out of Social1, Social2, and Pharmacist2, we decided to hire Pharmacist2 taking into account that the resulting corpus would require annotation at entity level for which Pharmacist2’s in-domain knowledge would be very valuable.</p>
      </sec>
      <sec>
        <title>Annotation</title>
        <p>Once the 2 pharmacists competed the annotation at sentence level, we focused on the entity level annotation targeting at the diseases, drugs, and symptoms. The results for Twitter and PubMed are shown in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>, using the relaxed constraints and strict constraints strategy described in the Methods section.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Detail of annotations in Twitter. The first column shows the element being evaluated. Columns 2-5 show the inter annotator agreement scores of pharmacist 1 (Ph1) and pharmacist 2 (Ph2) using relaxed and strict constraints. Columns 6 and 7 show the number of elements annotated by each pharmacist. Columns 8 and 9 show the number of matching elements between pharmacist’s annotations using relaxed and strict constraints.</p>
          </caption>
          <table width="600" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="115"/>
            <col width="70"/>
            <col width="75"/>
            <col width="70"/>
            <col width="75"/>
            <col width="40"/>
            <col width="50"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Annotated element</td>
                <td>Ph1 <break/>(relaxed <break/>constraints)</td>
                <td>Ph2 <break/>(relaxed <break/>constraints)</td>
                <td>Ph1 <break/>(strict <break/>constraints)</td>
                <td>Ph2 <break/>(strict <break/>constraints)</td>
                <td>#Ph1</td>
                <td>#Ph2</td>
                <td>#Matches <break/>(relaxed <break/>constraints)</td>
                <td>#Matches <break/>(strict <break/>constraints)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>97.39</td>
                <td>98.72</td>
                <td>93.52</td>
                <td>94.80</td>
                <td>1111</td>
                <td>1096</td>
                <td>1082</td>
                <td>1039</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>50.86</td>
                <td>91.47</td>
                <td>46.12</td>
                <td>82.95</td>
                <td>464</td>
                <td>258</td>
                <td>236</td>
                <td>214</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>77.23</td>
                <td>76.71</td>
                <td>54.21</td>
                <td>53.84</td>
                <td>1164</td>
                <td>1172</td>
                <td>899</td>
                <td>631</td>
              </tr>
              <tr valign="top">
                <td>Outcome-negative</td>
                <td>63.27</td>
                <td>75.19</td>
                <td>43.02</td>
                <td>51.12</td>
                <td>795</td>
                <td>669</td>
                <td>503</td>
                <td>342</td>
              </tr>
              <tr valign="top">
                <td>Outcome-positive</td>
                <td>11.01</td>
                <td>40.00</td>
                <td>8.26</td>
                <td>30.00</td>
                <td>109</td>
                <td>30</td>
                <td>12</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>Reason-to-use</td>
                <td>55.82</td>
                <td>60.18</td>
                <td>44.66</td>
                <td>48.14</td>
                <td>842</td>
                <td>781</td>
                <td>470</td>
                <td>376</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>46.37</td>
                <td>8.96</td>
                <td>39.11</td>
                <td>7.56</td>
                <td>248</td>
                <td>1283</td>
                <td>115</td>
                <td>97</td>
              </tr>
              <tr valign="top">
                <td>Exemplification</td>
                <td>10.11</td>
                <td>64.77</td>
                <td>3.37</td>
                <td>21.59</td>
                <td>564</td>
                <td>88</td>
                <td>57</td>
                <td>19</td>
              </tr>
              <tr valign="top">
                <td>Modality</td>
                <td>56.92</td>
                <td>30.58</td>
                <td>49.57</td>
                <td>26.63</td>
                <td>585</td>
                <td>1089</td>
                <td>333</td>
                <td>290</td>
              </tr>
              <tr valign="top">
                <td>Person</td>
                <td>72.56</td>
                <td>58.55</td>
                <td>60.21</td>
                <td>48.58</td>
                <td>1709</td>
                <td>2118</td>
                <td>1240</td>
                <td>1029</td>
              </tr>
              <tr valign="top">
                <td>Polarity</td>
                <td>76.06</td>
                <td>52.43</td>
                <td>53.52</td>
                <td>36.89</td>
                <td>71</td>
                <td>103</td>
                <td>54</td>
                <td>38</td>
              </tr>
              <tr valign="top">
                <td>Sentiment</td>
                <td>72.48</td>
                <td>19.46</td>
                <td>60.92</td>
                <td>16.36</td>
                <td>476</td>
                <td>1773</td>
                <td>345</td>
                <td>290</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>64.18</td>
                <td>19.59</td>
                <td>44.03</td>
                <td>13.44</td>
                <td>134</td>
                <td>439</td>
                <td>86</td>
                <td>59</td>
              </tr>
              <tr valign="top">
                <td>Status</td>
                <td>59.41</td>
                <td>22.07</td>
                <td>45.94</td>
                <td>17.07</td>
                <td>542</td>
                <td>1459</td>
                <td>322</td>
                <td>249</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Detail of annotations in PubMed. The first column shows the element being evaluated. Columns 2-5 show the inter annotator agreement scores of pharmacist 1 (Ph1) and pharmacist 2 (Ph2) using relaxed and strict constraints. Columns 6 and 7 show the number of elements annotated by each pharmacist. Columns 8 and 9 show the number of matching elements between pharmacist’s annotations using relaxed and strict constraints.</p>
          </caption>
          <table width="600" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="115"/>
            <col width="70"/>
            <col width="75"/>
            <col width="70"/>
            <col width="75"/>
            <col width="40"/>
            <col width="50"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Annotated element</td>
                <td>Ph1 <break/>(relaxed <break/>constraints)</td>
                <td>Ph2 <break/>(relaxed <break/>constraints)</td>
                <td>Ph1 <break/>(strict <break/>constraints)</td>
                <td>Ph2 <break/>(strict <break/>constraints)</td>
                <td>#Ph1</td>
                <td>#Ph2</td>
                <td>#Matches <break/>(relaxed <break/>constraints)</td>
                <td>#Matches <break/>(strict <break/>constraints)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>95.20</td>
                <td>97.90</td>
                <td>86.23</td>
                <td>88.67</td>
                <td>1271</td>
                <td>1236</td>
                <td>1210</td>
                <td>1096</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>64.18</td>
                <td>95.22</td>
                <td>53.41</td>
                <td>79.23</td>
                <td>1086</td>
                <td>732</td>
                <td>697</td>
                <td>580</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>85.13</td>
                <td>60.59</td>
                <td>70.61</td>
                <td>50.26</td>
                <td>558</td>
                <td>784</td>
                <td>475</td>
                <td>394</td>
              </tr>
              <tr valign="top">
                <td>Outcome-negative</td>
                <td>60.97</td>
                <td>64.86</td>
                <td>50.35</td>
                <td>53.56</td>
                <td>433</td>
                <td>407</td>
                <td>264</td>
                <td>218</td>
              </tr>
              <tr valign="top">
                <td>Outcome-positive</td>
                <td>56.25</td>
                <td>32.73</td>
                <td>43.75</td>
                <td>25.45</td>
                <td>32</td>
                <td>55</td>
                <td>18</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>Reason-to-use</td>
                <td>62.87</td>
                <td>77.39</td>
                <td>47.10</td>
                <td>57.98</td>
                <td>1535</td>
                <td>1247</td>
                <td>965</td>
                <td>723</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>52.17</td>
                <td>9.38</td>
                <td>48.70</td>
                <td>8.75</td>
                <td>115</td>
                <td>640</td>
                <td>60</td>
                <td>56</td>
              </tr>
              <tr valign="top">
                <td>Exemplification</td>
                <td>0.64</td>
                <td>50.00</td>
                <td>0.32</td>
                <td>25.00</td>
                <td>311</td>
                <td>4</td>
                <td>2</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Modality</td>
                <td>74.23</td>
                <td>50.52</td>
                <td>64.60</td>
                <td>43.96</td>
                <td>1370</td>
                <td>2013</td>
                <td>1017</td>
                <td>885</td>
              </tr>
              <tr valign="top">
                <td>Person</td>
                <td>63.93</td>
                <td>77.18</td>
                <td>56.08</td>
                <td>67.70</td>
                <td>1439</td>
                <td>1192</td>
                <td>920</td>
                <td>807</td>
              </tr>
              <tr valign="top">
                <td>Polarity</td>
                <td>25.00</td>
                <td>22.22</td>
                <td>25.00</td>
                <td>22.22</td>
                <td>16</td>
                <td>18</td>
                <td>4</td>
                <td>4</td>
              </tr>
              <tr valign="top">
                <td>Sentiment</td>
                <td>33.33</td>
                <td>1.96</td>
                <td>22.22</td>
                <td>1.31</td>
                <td>9</td>
                <td>153</td>
                <td>3</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>42.22</td>
                <td>33.33</td>
                <td>37.78</td>
                <td>29.82</td>
                <td>45</td>
                <td>57</td>
                <td>19</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>Status</td>
                <td>53.85</td>
                <td>2.52</td>
                <td>53.85</td>
                <td>2.52</td>
                <td>26</td>
                <td>555</td>
                <td>14</td>
                <td>14</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>By focusing on the results appearing in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>, we see that the agreement for the drugs in Twitter and PubMed is very high, which was expected, given our sampling strategy, although for diseases and symptoms the agreement score decreases noticeably in both Twitter and PubMed.</p>
        <p>When comparing the results for the relations (outcome-negative <italic>,</italic> outcome-positive, and reason-to-use), we saw low levels of agreement, having Twitter lower results in all cases. Analyzing the number of annotations it was clear that the use of outcome-positive relation varied considerably between annotators, contributing to the low scores.</p>
        <p>The attributes “person” (in PubMed), “modality” (in both PubMed and Twitter), “polarity,” and “sentiment” (in Twitter) were the ones obtaining the best scores. On the other hand, the attribute “exemplification” (in both PubMed and Twitter), “sentiment,” “polarity” (in PubMed), and “duration” (in Twitter) were very prone to disagreements as these scores were the lowest in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>.</p>
        <p>By analyzing the discrepancies in the annotations, we discovered that the distinction between disease and symptom entities, although theoretically clear, was hard to disambiguate in a number of sentences. We can see that in the tweet “Is steroid induced psychosis a thing? (Like short term prednisone tx)” (see <xref ref-type="fig" rid="figure2">Figure 2</xref>), psychosis could be identified as a symptom [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>] or as a disease [<xref ref-type="bibr" rid="ref52">52</xref>]. Similarly, the entity comorbid obesity found in the PubMed sentence “The present case report of topiramate’s effect on comorbid obesity,” could be also understood as both a symptom and a disease [<xref ref-type="bibr" rid="ref53">53</xref>]. Interestingly, we observed in those examples that even if the chosen type of entity (disease and symptom) was different, the annotators agreed on the chosen CUI.</p>
        <p>In the case of relations, we discovered that both outcome-positive relation and reason-to-use relation were confounded in some cases. One example from Twitter is the sentence “How about trazodone, so I can just feel a little funny and then knock out and have the best sleep of my life,” where the drug, trazodone, and the symptom, the best sleep of my life, were annotated as such by both annotators, although one annotator indicated the relation between these entities was an outcome-positive relation whereas the other annotator marked it as a reason-to-use relation. The same observation was seen in PubMed sentences as in “Because fatigue is a frequent symptom of depression and there is some evidence that treatment with an antidepressant improves fatigue in patients with fibromyalgia, we hypothesized that the antidepressant fluvoxamine might improve fatigue related to PBC and PSC.” In this sentence, the drug (fluvoxamine) and the symptom (fatigue) were correctly identified, same as the existing relation between the entities, but the chosen type of relation was different. This observation, combined with the fact that outcome-positive relation was the least used type of relation, helps in understanding the causes for the low inter annotator agreement score.</p>
        <p>Given the similarities between those concepts and the disagreements that we detected, we evaluated the inter annotator agreement score when conflating the concepts disease and symptom under “disease or symptom” concept. We also grouped together outcome-positive and reason-to-use relations under “benefit” relation. The use of those categories produced a noticeable improvement in the IAA scores. This strategy also improved the agreement scores for most of the attributes as can be seen in <xref ref-type="table" rid="table6">Table 6</xref> (for Twitter), and <xref ref-type="table" rid="table7">Table 7</xref> (for PubMed).</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Detail of annotations in Twitter using the conflation strategy. The first column shows the element being evaluated. Columns 2-5 show the inter annotator agreement scores of pharmacist 1 (Ph1) and pharmacist 2 (Ph2) using relaxed and strict constraints. Columns 6 and 7 show the number of elements annotated by each pharmacist. Columns 8 and 9 show the number of matching elements between pharmacist's annotations using relaxed and strict constraints.</p>
          </caption>
          <table width="600" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="115"/>
            <col width="70"/>
            <col width="75"/>
            <col width="70"/>
            <col width="75"/>
            <col width="40"/>
            <col width="50"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Annotated element</td>
                <td>Ph1 <break/>(relaxed <break/>constraints)</td>
                <td>Ph2 <break/>(relaxed <break/>constraints)</td>
                <td>Ph1 <break/>(strict <break/>constraints)</td>
                <td>Ph2 <break/>(strict <break/>constraints)</td>
                <td>#Ph1</td>
                <td>#Ph2</td>
                <td>#Matches <break/>(relaxed <break/>constraints)</td>
                <td>#Matches <break/>(strict <break/>constraints)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>97.39</td>
                <td>98.72</td>
                <td>93.52</td>
                <td>94.80</td>
                <td>1111</td>
                <td>1096</td>
                <td>1082</td>
                <td>1039</td>
              </tr>
              <tr valign="top">
                <td>Disease or symptom</td>
                <td>82.25</td>
                <td>93.64</td>
                <td>61.36</td>
                <td>69.86</td>
                <td>1628</td>
                <td>1430</td>
                <td>1339</td>
                <td>999</td>
              </tr>
              <tr valign="top">
                <td>Outcome-negative</td>
                <td>67.30</td>
                <td>79.97</td>
                <td>46.29</td>
                <td>55.01</td>
                <td>795</td>
                <td>669</td>
                <td>535</td>
                <td>368</td>
              </tr>
              <tr valign="top">
                <td>Benefit</td>
                <td>68.14</td>
                <td>79.90</td>
                <td>52.37</td>
                <td>61.41</td>
                <td>951</td>
                <td>811</td>
                <td>648</td>
                <td>498</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>50.00</td>
                <td>9.66</td>
                <td>41.94</td>
                <td>8.11</td>
                <td>248</td>
                <td>1283</td>
                <td>124</td>
                <td>104</td>
              </tr>
              <tr valign="top">
                <td>Exemplification</td>
                <td>10.11</td>
                <td>64.77</td>
                <td>3.37</td>
                <td>21.59</td>
                <td>564</td>
                <td>88</td>
                <td>57</td>
                <td>19</td>
              </tr>
              <tr valign="top">
                <td>Modality</td>
                <td>64.44</td>
                <td>34.62</td>
                <td>54.53</td>
                <td>29.29</td>
                <td>585</td>
                <td>1089</td>
                <td>377</td>
                <td>319</td>
              </tr>
              <tr valign="top">
                <td>Person</td>
                <td>77.30</td>
                <td>62.37</td>
                <td>63.96</td>
                <td>51.61</td>
                <td>1709</td>
                <td>2118</td>
                <td>1321</td>
                <td>1093</td>
              </tr>
              <tr valign="top">
                <td>Polarity</td>
                <td>80.28</td>
                <td>55.34</td>
                <td>57.75</td>
                <td>39.81</td>
                <td>71</td>
                <td>103</td>
                <td>57</td>
                <td>41</td>
              </tr>
              <tr valign="top">
                <td>Sentiment</td>
                <td>75.00</td>
                <td>20.14</td>
                <td>62.61</td>
                <td>16.81</td>
                <td>476</td>
                <td>1773</td>
                <td>357</td>
                <td>298</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>67.16</td>
                <td>20.50</td>
                <td>47.01</td>
                <td>14.35</td>
                <td>134</td>
                <td>439</td>
                <td>90</td>
                <td>63</td>
              </tr>
              <tr valign="top">
                <td>Status</td>
                <td>61.81</td>
                <td>22.96</td>
                <td>48.15</td>
                <td>17.89</td>
                <td>542</td>
                <td>1459</td>
                <td>335</td>
                <td>261</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Detail of annotations in PubMed using the conflation strategy. The first column shows the element being evaluated. Columns 2-5 show the inter annotator agreement scores of pharmacist 1 (Ph1) and pharmacist 2 (Ph2) using relaxed and strict constraints. Columns 6 and 7 show the number of elements annotated by each pharmacist. Columns 8 and 9 show the number of matching elements between pharmacist’s annotations using relaxed and strict constraints.</p>
          </caption>
          <table width="600" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="115"/>
            <col width="70"/>
            <col width="75"/>
            <col width="70"/>
            <col width="75"/>
            <col width="40"/>
            <col width="50"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Annotated element</td>
                <td>Ph1 <break/>(relaxed <break/>constraints)</td>
                <td>Ph2 <break/>(relaxed <break/>constraints)</td>
                <td>Ph1 <break/>(strict <break/>constraints)</td>
                <td>Ph2 <break/>(strict <break/>constraints)</td>
                <td>#Ph1</td>
                <td>#Ph2</td>
                <td>#Matches <break/>(relaxed <break/>constraints)</td>
                <td>#Matches <break/>(strict <break/>constraints)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>95.20</td>
                <td>97.90</td>
                <td>86.23</td>
                <td>88.67</td>
                <td>1271</td>
                <td>1236</td>
                <td>1210</td>
                <td>1096</td>
              </tr>
              <tr valign="top">
                <td>Disease or symptom</td>
                <td>91.91</td>
                <td>99.67</td>
                <td>74.21</td>
                <td>80.47</td>
                <td>1644</td>
                <td>1516</td>
                <td>1511</td>
                <td>1220</td>
              </tr>
              <tr valign="top">
                <td>Outcome-negative</td>
                <td>81.52</td>
                <td>86.73</td>
                <td>65.82</td>
                <td>70.02</td>
                <td>433</td>
                <td>407</td>
                <td>353</td>
                <td>285</td>
              </tr>
              <tr valign="top">
                <td>Benefit</td>
                <td>77.41</td>
                <td>93.16</td>
                <td>56.86</td>
                <td>68.43</td>
                <td>1567</td>
                <td>1302</td>
                <td>1213</td>
                <td>891</td>
              </tr>
              <tr valign="top">
                <td>Duration</td>
                <td>53.91</td>
                <td>9.69</td>
                <td>50.43</td>
                <td>9.06</td>
                <td>115</td>
                <td>640</td>
                <td>62</td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>Exemplification</td>
                <td>0.64</td>
                <td>50.00</td>
                <td>0.32</td>
                <td>25.00</td>
                <td>311</td>
                <td>4</td>
                <td>2</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Modality</td>
                <td>83.43</td>
                <td>56.78</td>
                <td>71.39</td>
                <td>48.58</td>
                <td>1370</td>
                <td>2013</td>
                <td>1143</td>
                <td>978</td>
              </tr>
              <tr valign="top">
                <td>Person</td>
                <td>71.58</td>
                <td>86.41</td>
                <td>62.13</td>
                <td>75.00</td>
                <td>1439</td>
                <td>1192</td>
                <td>1030</td>
                <td>894</td>
              </tr>
              <tr valign="top">
                <td>Polarity</td>
                <td>43.75</td>
                <td>38.89</td>
                <td>43.75</td>
                <td>38.89</td>
                <td>16</td>
                <td>18</td>
                <td>7</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Sentiment</td>
                <td>33.33</td>
                <td>1.96</td>
                <td>22.22</td>
                <td>1.31</td>
                <td>9</td>
                <td>153</td>
                <td>3</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Severity</td>
                <td>53.33</td>
                <td>42.11</td>
                <td>46.67</td>
                <td>36.84</td>
                <td>45</td>
                <td>57</td>
                <td>24</td>
                <td>21</td>
              </tr>
              <tr valign="top">
                <td>Status</td>
                <td>53.85</td>
                <td>2.52</td>
                <td>53.85</td>
                <td>2.52</td>
                <td>26</td>
                <td>555</td>
                <td>14</td>
                <td>14</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Sample with the annotation of a drug, a disease and the relation between these concepts in a sentence from Twitter.</p>
          </caption>
          <graphic xlink:href="jmir_v3i2e24_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We produced a corpus of documents obtained using a very similar pipeline for both Twitter and PubMed. All the documents were filtered and double annotated by the same experts (pharmacists).</p>
        <p>TwiMed corpus shows that the drugs appearing less often in Twitter are those used to treat cancer (“docetaxel,” “bevacizumab,” “tamoxifen”) and epilepsy (“topiramate,” “carbamazepine”). On the other hand, the drugs used to treat attention deficit hyperactivity disorder (“modafinil,” “destroamphetamine sulphate,” “lisdexamfetamine”) are the drugs having the most mentions in Twitter, whereas at the same time those are the drugs having the least mentions in PubMed sentences. This fact evidences that public concerns are not always aligned with the interests of the scientific community, and potential areas of research may emerge from those findings to understand the reasons and related outcomes.</p>
        <p>In our analysis, we observed a much higher balance for the drug mentions in PubMed than in Twitter. Factors such as the demographics of the user base in Twitter [<xref ref-type="bibr" rid="ref54">54</xref>], or the time when the messages were gathered [<xref ref-type="bibr" rid="ref26">26</xref>] are elements that should be studied in detail to measure their correlation with the different distribution of drug mentions.</p>
        <p>Given our main goal was to create a corpus covering mentions of drugs used to treat a number of different conditions, we applied a set of controlling mechanisms to extract the data from Twitter and from PubMed, and researchers should be aware that the drugs in this corpus have different distribution in the original sources of information than the distributions presented here, and following quantitative studies may be needed to understand those differences.</p>
        <p>We believe the reason for the high agreement in the annotation of drugs is the use of a closed set of drug names that both annotators knew beforehand. We can see, however, that there is a lower level of agreement for the annotation of symptoms and diseases and the main reason would be that the annotators had to identify these mentions, and there is an open list of entities that can be found in the texts, not to mention that these entities could be presented using an exemplification. In addition, in some contexts a disease can be considered as a symptom, and the short nature of the sentences can act as a factor in confounding the nature of these entities. Similarly, more subjective concepts such as the duration attribute or the exemplification attribute show a low level of agreement probably because the annotators had to interpret these elements by themselves and in some cases a certain level of subjectivity led the decision.</p>
        <p>Researchers should be aware that this corpus is not devised to capture everything about the selected set of 30 drugs, and there are a number of drug names appearing in the selected set of sentences which were not annotated because these were not included in the target set of drugs. Similarly, there are DDIs and other relations that the corpus does not include because of our constraints. However, we believe the provided sample can help in training NLP systems to capture more information. Nonetheless, we provide a set of semantically correct annotations that can be used in NLP studies.</p>
        <p>Our annotation also confirmed that there is lower agreement in the annotation of tweets than in the annotation of PubMed sentences, showing the noisy nature of Twitter [<xref ref-type="bibr" rid="ref13">13</xref>]. Moreover, when applying our conflation strategy aimed at resolving disagreements, we observed that these differences still remained.</p>
        <p>We noticed that a number of those disagreements were caused when confounding “diseases” and “symptoms.” Similarly, acronyms appearing in documents from PubMed tend to be explained the first time they are presented, which does not necessarily have to be when the drugs and related symptoms and diseases are discussed. In our case we allowed the annotators to access the full articles during the annotation process to reduce the impact of this problem. Nonetheless, we believe the use of acronyms is a potential source of confusion in texts where the context is scarce, and this potential problem should be handled.</p>
        <p>Additionally, other noticeable finding when using the same guidelines is the fact that disagreements appear in similar categories for both Twitter and PubMed. <xref ref-type="fig" rid="figure3">Figure 3</xref> shows an example where the string “eyelids are itchy” was annotated with the duration of “regular” by one annotator (to indicate that there is a continued lasting span), whereas the other annotator chose “irregular” for the duration attribute (to indicate that there is no pattern in the lasting span).</p>
        <p><xref ref-type="fig" rid="figure3">Figure 3</xref> also shows an example where the annotation for the attribute “exemplification” differs between annotators as the string “eyelids are itchy” was annotated as an exemplification by only one of the two annotators.</p>
        <p>Besides PubMed-Twitter comparative studies, our corpus is of potential interest for researchers aiming at finding sentences containing information on the drugs, symptoms, and diseases. We believe this corpus can become a useful resource to discern informational sentences in the area of pharmacovigilance as other researchers can use the sentences we included in this dataset to create classifiers targeting at the correct identification of sentences reporting drug-use.</p>
        <p>This dataset shows that for similar events coming from very different data sources, the way in which people communicate the same messages has noticeable differences. This corpus can provide useful insights to science communicators and public institutions for adapting their messages when addressing the general public so that the information can attract more attention. One of such examples would be the use of social media by official health institutions, where most of the messages are more formal than average social media messages, as a mean to reach a wider audience during health promotion and disease prevention campaigns as the wording may affect the impact of the messages.</p>
        <p>We believe combining the information contained in scientific reports, of high quality and very trustworthy, together with the information coming from social media messages, which is global, has a high volume, and is up-to-date, should be taken into account when building pharmacovigilance systems. We hope this corpus can help researchers interested in combining the potential of those data sources.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Sample of an annotation where “duration” and “exemplification” attributes are used.</p>
          </caption>
          <graphic xlink:href="jmir_v3i2e24_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We have presented a pharmacovigilance corpus that, to our knowledge, is the first corpus that allows researchers to perform direct comparative studies toward understanding the differences between drug reports in Twitter and PubMed.</p>
        <p>Our corpus contains annotations for drugs, symptoms, and diseases; their attributes (polarity, person <italic>,</italic> modality, exemplification, duration, severity, status, sentiment); and the relations between the annotated entities (reason-to-use, outcome-negative, and outcome-positive).</p>
        <p>We also identified the source of a number of disagreements for the annotated entities and relations, and proposed a conflation strategy to resolve those discrepancies. That approach resulted in higher agreement scores for most entities and relations.</p>
        <p>We hope that given the comprehensive set of drug names and the annotated entities and relations included in this corpus, it can become a standard resource to compare results from different pharmacovigilance studies, especially in the area of NLP as it can help in training to recognize the entities and relations in the texts. Similarly, this corpus can help in comparing the performance of NLP tools across the 2 different linguistic registers (formal and informal).</p>
        <p>In summary, we present a comparable corpus for pharmacovigilance studies and the annotation scheme we devised. This work is presented to the research community in the belief that such resources can help in this rapidly growing area.</p>
        <p>The corpus we release, available as <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>, contains the annotations for all the entities, relations, and attributes where both annotators agreed. Additionally, we provide the tools to obtain the raw tweets used in the annotation, to comply with Twitter’s terms of service [<xref ref-type="bibr" rid="ref55">55</xref>], and we also provide the tools to preprocess the raw sentences from both Twitter and PubMed to reuse the released annotations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>TwiMed corpus.</p>
        <media xlink:href="jmir_v3i2e24_app1.zip" xlink:title="ZIP File (Zip Archive), 2MB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>Annotation guidelines used to prepare TwiMed corpus.</p>
        <media xlink:href="jmir_v3i2e24_app2.pdf" xlink:title="PDF File (Adobe PDF File), 139KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADE</term>
          <def>
            <p>adverse drug events</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ASCII</term>
          <def>
            <p>American standard code for information interchange</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CLEF</term>
          <def>
            <p>Conference and Labs of the Evaluation Forum</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CUI</term>
          <def>
            <p>concept unique identifier</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DDI</term>
          <def>
            <p>drug-drug interaction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">IAA</term>
          <def>
            <p>inter annotator agreement</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ShARe</term>
          <def>
            <p>shared annotated resources</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research project was supported by a grant from the Japanese Ministry of Education, Culture, Sports, Science and Technology (MEXT). We would also like to thank Ana Salto Hurtado and Marta Gómez Márquez de Prado for their help in annotating the sentences. We want to thank Ramiro Aparicio Gallardo for his technical support. Finally, we also want to thank Dr Nut Limsopatham and Dr Yuka Tateisi for their suggestions while developing the guidelines.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hakala</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Van Landeghem</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Salakoski</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Van de Peer</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ginter</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Application of the EVEX resource to event extraction and network construction: Shared Task entry and result analysis</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2015</year>  
        <conf-name>BioNLP Shared Task 2013</conf-name>
        <conf-date>August 9, 2013</conf-date>
        <conf-loc>Sofia, Bulgaria</conf-loc>
        <fpage>S3</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-16-S16-S3"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-16-S16-S3</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Freifeld</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Brownstein</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Menone</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Bao</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Filice</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Kass-Hout</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Dasgupta</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Digital drug safety surveillance: monitoring pharmaceutical products in twitter</article-title>
        <source>Drug Safety</source>  
        <year>2014</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>343</fpage>  
        <lpage>50</lpage> </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bian</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Topaloglu</surname>
            <given-names>U</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Towards large-scale twitter mining for drug-related adverse events</article-title>
        <year>2012</year>  
        <conf-name>Proceedings of the 2012 international workshop on Smart health and wellbeing</conf-name>
        <conf-date>October 29, 2012</conf-date>
        <conf-loc>Maui, HI</conf-loc>
        <fpage>25</fpage>  
        <lpage>32</lpage>  
        <pub-id pub-id-type="doi">10.1145/2389707.2389713</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ginn</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Nikfarjam</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Jayaraman</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Upadhaya</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Utilizing social media data for pharmacovigilance: a review</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>04</month>  
        <volume>54</volume>  
        <fpage>202</fpage>  
        <lpage>12</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00036-2"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2015.02.004</pub-id>
        <pub-id pub-id-type="medline">25720841</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(15)00036-2</pub-id>
        <pub-id pub-id-type="pmcid">PMC4408239</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nikfarjam</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sarker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Ginn</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2015</year>  
        <month>05</month>  
        <volume>22</volume>  
        <issue>3</issue>  
        <fpage>671</fpage>  
        <lpage>81</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25755127"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
        <pub-id pub-id-type="medline">25755127</pub-id>
        <pub-id pub-id-type="pii">ocu041</pub-id>
        <pub-id pub-id-type="pmcid">PMC4457113</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Björne</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Salakoski</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Biomedical Event Extraction for Diverse Corpora</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2015</year>  
        <volume>16</volume>  
        <issue>Suppl 16</issue>  
        <fpage>S4</fpage>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-16-S16-S4</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Miwa</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Thompson</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>McNaught</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Kell</surname>
            <given-names>DB</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Extracting semantically enriched events from biomedical literature</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2012</year>  
        <month>05</month>  
        <day>23</day>  
        <volume>13</volume>  
        <fpage>108</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-108"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-13-108</pub-id>
        <pub-id pub-id-type="pii">1471-2105-13-108</pub-id>
        <pub-id pub-id-type="pmcid">PMC3464657</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Miwa</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>NaCTeM EventMine for BioNLP 2013 CG and PC tasks</article-title>
        <year>2013</year>  
        <conf-name>Proceedings of the BioNLP Shared Task 2013 Workshop</conf-name>
        <conf-date>2013</conf-date>
        <conf-loc>Sofia, Bulgaria</conf-loc>
        <fpage>94</fpage>  
        <lpage>8</lpage> </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Herrero-Zazo</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Segura-Bedmar</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Martínez</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Declerck</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>The DDI corpus: an annotated corpus with pharmacological substances and drug-drug interactions</article-title>
        <source>J Biomed Inform</source>  
        <year>2013</year>  
        <month>10</month>  
        <volume>46</volume>  
        <issue>5</issue>  
        <fpage>914</fpage>  
        <lpage>20</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00112-3"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2013.07.011</pub-id>
        <pub-id pub-id-type="medline">23906817</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(13)00112-3</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gurulingappa</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Rajput</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Roberts</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fluck</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Hofmann-Apitius</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Toldo</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Development of a benchmark corpus to support the automatic extraction of drug-related adverse effects from medical case reports</article-title>
        <source>J Biomed Inform</source>  
        <year>2012</year>  
        <month>10</month>  
        <volume>45</volume>  
        <issue>5</issue>  
        <fpage>885</fpage>  
        <lpage>92</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00061-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.008</pub-id>
        <pub-id pub-id-type="medline">22554702</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(12)00061-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nawaz</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Thompson</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>McNaught</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Meta-knowledge annotation of bio-events</article-title>
        <year>2010</year>  
        <conf-name>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC 2010)</conf-name>
        <conf-date>May 17-23, 2010</conf-date>
        <conf-loc>Valletta, Malta</conf-loc>
        <fpage>2498</fpage>  
        <lpage>505</lpage> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Elhadad</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Savova</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <source>CHPC</source>  
        <access-date>2017-04-24</access-date>
        <comment>ShARe/CLEF eHealth 2013 shared task: guidelines for the annotation of disorders in clinical notes 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://blulab.chpc.utah.edu/sites/default/files/ShARe_Guidelines_CLEF_2013.pdf">http://blulab.chpc.utah.edu/sites/default/files/ShARe_Guidelines_CLEF_2013.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6pxg52DD9"/></comment> </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Baldwin</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Cook</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Lui</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>MacKinlay</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>how noisy social media text, how diffrnt social media sourcess?</article-title>
        <year>2013</year>  
        <conf-name>International Joint Conference on Natural Language Processing</conf-name>
        <conf-date>October 14-18, 2013</conf-date>
        <conf-loc>Nagoya, Japan</conf-loc>
        <fpage>356</fpage>  
        <lpage>64</lpage> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lippincott</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Séaghdha</surname>
            <given-names>DÓ</given-names>
          </name>
          <name name-style="western">
            <surname>Korhonen</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Exploring subdomain variation in biomedical language</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2011</year>  
        <month>05</month>  
        <day>27</day>  
        <volume>12</volume>  
        <fpage>212</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-212"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-12-212</pub-id>
        <pub-id pub-id-type="medline">21619603</pub-id>
        <pub-id pub-id-type="pii">1471-2105-12-212</pub-id>
        <pub-id pub-id-type="pmcid">PMC3118171</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Haerian</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Salmasian</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Harpaz</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Chase</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A drug-adverse event extraction algorithm to support pharmacovigilance knowledge mining from PubMed citations</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2011</year>  
        <volume>2011</volume>  
        <fpage>1464</fpage>  
        <lpage>70</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195210"/>
        </comment>  
        <pub-id pub-id-type="medline">22195210</pub-id>
        <pub-id pub-id-type="pmcid">PMC3243206</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ohta</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Tateisi</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tsujii</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>GENIA corpus-a semantically annotated corpus for bio-textmining</article-title>
        <source>Bioinformatics</source>  
        <year>2003</year>  
        <month>07</month>  
        <day>10</day>  
        <volume>19</volume>  
        <issue>Suppl 1</issue>  
        <fpage>i180</fpage>  
        <lpage>i182</lpage>  
        <pub-id pub-id-type="doi">10.1093/bioinformatics/btg1023</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
        <source>Khresmoi</source>  
        <access-date>2011-11-06</access-date>
        <comment>Khresmoi - medical information analysis and retrieval 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.khresmoi.eu">http://www.khresmoi.eu</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="62zteCKmX"/></comment> </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Goeuriot</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Kelly</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Jones</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Zuccon</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Suominen</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Hanbury</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Mueller</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Leveling</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Creation of a new evaluation benchmark for information retrieval targeting patient information needs</article-title>
        <year>2013</year>  
        <conf-name>The Fifth International Workshop on Evaluating Information Access</conf-name>
        <conf-date>June 18, 2013</conf-date>
        <conf-loc>Tokyo, Japan</conf-loc></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Frost</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Okun</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Vaughan</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Heywood</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wicks</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Patient-reported outcomes as a source of evidence in off-label prescribing: analysis of data from PatientsLikeMe</article-title>
        <source>J Med Internet Res</source>  
        <year>2011</year>  
        <month>01</month>  
        <day>21</day>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>e6</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2011/1/e6/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.1643</pub-id>
        <pub-id pub-id-type="medline">21252034</pub-id>
        <pub-id pub-id-type="pii">v13i1e6</pub-id>
        <pub-id pub-id-type="pmcid">PMC3221356</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Benton</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ungar</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Hill</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hennessy</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mao</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Chung</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Leonard</surname>
            <given-names>CE</given-names>
          </name>
          <name name-style="western">
            <surname>Holmes</surname>
            <given-names>JH</given-names>
          </name>
        </person-group>
        <article-title>Identifying potential adverse effects using the web: a new approach to medical hypothesis generation</article-title>
        <source>J Biomed Inform</source>  
        <year>2011</year>  
        <month>12</month>  
        <volume>44</volume>  
        <issue>6</issue>  
        <fpage>989</fpage>  
        <lpage>96</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00123-7"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2011.07.005</pub-id>
        <pub-id pub-id-type="medline">21820083</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(11)00123-7</pub-id>
        <pub-id pub-id-type="pmcid">PMC4404640</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Alvaro</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Conway</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Doan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Lofi</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Overington</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Collier</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Crowdsourcing Twitter annotations to identify first-hand experiences of prescription drug use</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>12</month>  
        <volume>58</volume>  
        <fpage>280</fpage>  
        <lpage>7</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00241-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2015.11.004</pub-id>
        <pub-id pub-id-type="medline">26556646</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(15)00241-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Fabbri</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rosenbloom</surname>
            <given-names>ST</given-names>
          </name>
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>A scalable framework to detect personal health mentions on twitter</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <month>06</month>  
        <day>05</day>  
        <volume>17</volume>  
        <issue>6</issue>  
        <fpage>e138</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/6/e138/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4305</pub-id>
        <pub-id pub-id-type="medline">26048075</pub-id>
        <pub-id pub-id-type="pii">v17i6e138</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526910</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <source>Twitter</source>  
        <access-date>2015-04-28</access-date>
        <comment>About company 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://about.twitter.com/company">http://about.twitter.com/company</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6Y8Gpanwp"/></comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
        <source>Internet-Live-Stats</source>  
        <access-date>2016-10-13</access-date>
        <comment>Twitter usage statistics 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.internetlivestats.com/twitter-statistics/">http://www.internetlivestats.com/twitter-statistics/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lEjp8QRn"/></comment> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Terras</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Warwick</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>How twitter is studied in the medical professions: a classification of twitter papers indexed in PubMed</article-title>
        <source>Med 2 0</source>  
        <year>2013</year>  
        <volume>2</volume>  
        <issue>2</issue>  
        <fpage>e2</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.medicine20.com/2013/2/e2/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/med20.2269</pub-id>
        <pub-id pub-id-type="medline">25075237</pub-id>
        <pub-id pub-id-type="pii">v2i2e2</pub-id>
        <pub-id pub-id-type="pmcid">PMC4084770</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hanson</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Burton</surname>
            <given-names>SH</given-names>
          </name>
          <name name-style="western">
            <surname>Giraud-Carrier</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>West</surname>
            <given-names>JH</given-names>
          </name>
          <name name-style="western">
            <surname>Barnes</surname>
            <given-names>MD</given-names>
          </name>
          <name name-style="western">
            <surname>Hansen</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Tweaking and tweeting: exploring Twitter for nonmedical use of a psychostimulant drug (Adderall) among college students</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <month>04</month>  
        <day>17</day>  
        <volume>15</volume>  
        <issue>4</issue>  
        <fpage>e62</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2013/4/e62/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2503</pub-id>
        <pub-id pub-id-type="medline">23594933</pub-id>
        <pub-id pub-id-type="pii">v15i4e62</pub-id>
        <pub-id pub-id-type="pmcid">PMC3636321</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sarker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>O?Connor</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Ginn</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Scotch</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Malone</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Social media mining for toxicovigilance: automatic monitoring of prescription medication abuse from twitter</article-title>
        <source>Drug safety</source>  
        <year>2016</year>  
        <fpage>231</fpage>  
        <lpage>240</lpage>  
        <pub-id pub-id-type="doi">10.1007/s40264-015-0379-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Plachouras</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Leidner</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Garrow</surname>
            <given-names>AG</given-names>
          </name>
        </person-group>
        <article-title>Quantifying self-reported adverse drug events on twitter: signal and topic analysis</article-title>
        <year>2016</year>  
        <conf-name>International Conference on Social Media &#38; Society</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>London</conf-loc>
        <pub-id pub-id-type="doi">10.1145/2930971.2930977</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rastegar-Mojarad</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ye</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Wall</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Murali</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Collecting and analyzing patient experiences of health care from social media</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <month>07</month>  
        <day>02</day>  
        <volume>4</volume>  
        <issue>3</issue>  
        <fpage>e78</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.researchprotocols.org/2015/3/e78/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/resprot.3433</pub-id>
        <pub-id pub-id-type="medline">26137885</pub-id>
        <pub-id pub-id-type="pii">v4i3e78</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526973</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Weeg</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Schwartz</surname>
            <given-names>HA</given-names>
          </name>
          <name name-style="western">
            <surname>Hill</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Merchant</surname>
            <given-names>RM</given-names>
          </name>
          <name name-style="western">
            <surname>Arango</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Ungar</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Using twitter to measure public discussion of diseases: a case study</article-title>
        <source>JMIR Public Health Surveill</source>  
        <year>2015</year>  
        <month>06</month>  
        <day>26</day>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>e6</fpage>  
        <pub-id pub-id-type="doi">10.2196/publichealth.3953</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Du</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Rachul</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>Gordie Howe's “Miraculous Treatment”: case study of twitter users' reactions to a sport celebrity's stem cell treatment</article-title>
        <source>JMIR Public Health Surveill</source>  
        <year>2016</year>  
        <volume>2</volume>  
        <issue>1</issue>  
        <fpage>e8</fpage>  
        <pub-id pub-id-type="doi">10.2196/publichealth.5264</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Segura-Bedmar</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Martínez</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Revert</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Moreno-Schneider</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Exploring Spanish health social media for detecting drug effects</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2015</year>  
        <month>6</month>  
        <volume>15 Suppl 2</volume>  
        <fpage>S6</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.biomedcentral.com/1472-6947/15/S2/S6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-15-S2-S6</pub-id>
        <pub-id pub-id-type="medline">26100267</pub-id>
        <pub-id pub-id-type="pii">1472-6947-15-S2-S6</pub-id>
        <pub-id pub-id-type="pmcid">PMC4474583</pub-id></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Deléger</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Grouin</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Zweigenbaum</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Extracting medication information from French clinical texts</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2010</year>  
        <volume>160</volume>  
        <issue>Pt 2</issue>  
        <fpage>949</fpage>  
        <lpage>53</lpage>  
        <pub-id pub-id-type="medline">20841824</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Polepalli</surname>
            <given-names>RB</given-names>
          </name>
          <name name-style="western">
            <surname>Belknap</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Frid</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>West</surname>
            <given-names>DP</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Automatically recognizing medication and adverse event information from food and drug administration's adverse event reporting system narratives</article-title>
        <source>JMIR Med Inform</source>  
        <year>2014</year>  
        <month>06</month>  
        <day>27</day>  
        <volume>2</volume>  
        <issue>1</issue>  
        <fpage>e10</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://medinform.jmir.org/2014/1/e10/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/medinform.3022</pub-id>
        <pub-id pub-id-type="medline">25600332</pub-id>
        <pub-id pub-id-type="pii">v2i1e10</pub-id>
        <pub-id pub-id-type="pmcid">PMC4288072</pub-id></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hoffman</surname>
            <given-names>KB</given-names>
          </name>
          <name name-style="western">
            <surname>Dimbil</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Tatonetti</surname>
            <given-names>NP</given-names>
          </name>
          <name name-style="western">
            <surname>Kyle</surname>
            <given-names>RF</given-names>
          </name>
        </person-group>
        <article-title>A pharmacovigilance signaling system based on FDA regulatory action and post-marketing adverse event reports</article-title>
        <source>Drug safety</source>  
        <year>2016</year>  
        <month>06</month>  
        <volume>39</volume>  
        <issue>6</issue>  
        <fpage>561</fpage>  
        <lpage>75</lpage>  
        <pub-id pub-id-type="doi">10.1007/s40264-016-0409-x</pub-id>
        <pub-id pub-id-type="medline">26946292</pub-id>
        <pub-id pub-id-type="pii">10.1007/s40264-016-0409-x</pub-id></nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wongchaisuwat</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Klabjan</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Jonnalagadda</surname>
            <given-names>SR</given-names>
          </name>
        </person-group>
        <article-title>A semi-supervised learning approach to enhance health care community-based question answering: a case study in alcoholism</article-title>
        <source>JMIR Med Inform</source>  
        <year>2016</year>  
        <month>08</month>  
        <day>02</day>  
        <volume>4</volume>  
        <issue>3</issue>  
        <fpage>e24</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://medinform.jmir.org/2016/3/e24/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/medinform.5490</pub-id>
        <pub-id pub-id-type="medline">27485666</pub-id>
        <pub-id pub-id-type="pii">v4i3e24</pub-id>
        <pub-id pub-id-type="pmcid">PMC4987493</pub-id></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Abacha</surname>
            <given-names>AB</given-names>
          </name>
          <name name-style="western">
            <surname>Zweigenbaum</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>MEANS: A medical question-answering system combining NLP techniques and semantic Web technologies</article-title>
        <source>Inf Process Manage</source>  
        <year>2015</year>  
        <fpage>570</fpage>  
        <lpage>94</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ipm.2015.04.006</pub-id></nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Leaman</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Wojtulewicz</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Sullivan</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Skariah</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Towards internet-age pharmacovigilance: extracting adverse drug reactions from user posts to health-related social networks</article-title>
        <year>2010</year>  
        <conf-name>Proceedings of the 2010 Workshop on Biomedical Natural Language Processing</conf-name>
        <conf-date>July 15, 2010</conf-date>
        <conf-loc>Uppsala, Sweden</conf-loc>
        <fpage>117</fpage>  
        <lpage>25</lpage> </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sampathkumar</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Mining adverse drug reactions from online healthcare forums using hidden Markov model</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2014</year>  
        <month>10</month>  
        <day>23</day>  
        <volume>14</volume>  
        <fpage>91</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-14-91"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-14-91</pub-id>
        <pub-id pub-id-type="medline">25341686</pub-id>
        <pub-id pub-id-type="pii">1472-6947-14-91</pub-id>
        <pub-id pub-id-type="pmcid">PMC4283122</pub-id></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
        <source>Europe PMC</source>  
        <access-date>2017-04-24</access-date>
        <comment>RESTful Web Service 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/RestfulWebService">http://europepmc.org/RestfulWebService</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6pxubKVDG"/></comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
        <source>ASU</source>  
        <access-date>2017-04-24</access-date>
        <comment>Arizona Disease Corpus Annotation Guidelines 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://diego.asu.edu/downloads/AZDCAnnotationGuidelines_v013.pdf">http://diego.asu.edu/downloads/AZDCAnnotationGuidelines_v013.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6pxufeemu"/></comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Stenetorp</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Pyysalo</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Topic</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Ohta</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Tsujii</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>BRAT: a web-based tool for NLP-assisted text annotation</article-title>
        <year>2012</year>  
        <conf-name>Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
        <conf-date>April 23-27, 2012</conf-date>
        <conf-loc>Avignon, France</conf-loc>
        <fpage>102</fpage>  
        <lpage>7</lpage> </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ogren</surname>
            <given-names>PV</given-names>
          </name>
        </person-group>
        <article-title>Knowtator: a plug-in for creating training and evaluation data sets for biomedical natural language systems</article-title>
        <year>2006</year>  
        <conf-name>International Protégé Conference</conf-name>
        <conf-date>July 23-26, 2006</conf-date>
        <conf-loc>Stanford, California</conf-loc>
        <fpage>73</fpage>  
        <lpage>6</lpage> </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
        <source>SIDER</source>  
        <access-date>2013-01-23</access-date>
        <comment>SIDER Side Effect Resource 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://sideeffects.embl.de">http://sideeffects.embl.de</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6DsJnvdbl"/></comment> </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
        <source>BioPortal</source>  
        <access-date>2017-04-24</access-date>
        <comment>Medical dictionary for regulatory activities 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bioportal.bioontology.org/ontologies/MEDDRA">http://bioportal.bioontology.org/ontologies/MEDDRA</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6pxvIyd8g"/></comment> </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dhombres</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Bodenreider</surname>
            <given-names>O</given-names>
          </name>
        </person-group>
        <article-title>Interoperability between phenotypes in research and healthcare terminologies--investigating partial mappings between HPO and SNOMED CT</article-title>
        <source>J Biomed Semantics</source>  
        <year>2016</year>  
        <volume>7</volume>  
        <fpage>3</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-016-0047-3"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s13326-016-0047-3</pub-id>
        <pub-id pub-id-type="medline">26865946</pub-id>
        <pub-id pub-id-type="pii">47</pub-id>
        <pub-id pub-id-type="pmcid">PMC4748471</pub-id></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mizuno</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ogishima</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Nishigori</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Jamieson</surname>
            <given-names>DG</given-names>
          </name>
          <name name-style="western">
            <surname>Verspoor</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Tanaka</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Yaegashi</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Nakaya</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>The pre-eclampsia ontology: a disease ontology representing the domain knowledge specific to pre-eclampsia</article-title>
        <source>PLoS One</source>  
        <year>2016</year>  
        <volume>11</volume>  
        <issue>10</issue>  
        <fpage>e0162828</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0162828"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0162828</pub-id>
        <pub-id pub-id-type="medline">27788142</pub-id>
        <pub-id pub-id-type="pii">PONE-D-16-09448</pub-id>
        <pub-id pub-id-type="pmcid">PMC5082890</pub-id></nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Roberts</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Gaizauskas</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Hepple</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Demetriou</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Setzer</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Roberts</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Semantic annotation of clinical text: the CLEF corpus</article-title>
        <year>2008</year>  
        <conf-name>LREC</conf-name>
        <conf-date>May 26, 2008</conf-date>
        <conf-loc>Marrakech, Morocco</conf-loc>
        <fpage>19</fpage>  
        <lpage>26</lpage> </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hripcsak</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Rothschild</surname>
            <given-names>AS</given-names>
          </name>
        </person-group>
        <article-title>Agreement, the f-measure, and reliability in information retrieval</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2005</year>  
        <month>01</month>  
        <volume>12</volume>  
        <issue>3</issue>  
        <fpage>296</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=15684123"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1733</pub-id>
        <pub-id pub-id-type="medline">15684123</pub-id>
        <pub-id pub-id-type="pii">M1733</pub-id>
        <pub-id pub-id-type="pmcid">PMC1090460</pub-id></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tamminga</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Carlsson</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Partial dopamine agonists and dopaminergic stabilizers, in the treatment of psychosis</article-title>
        <source>Curr Drug Targets CNS Neurol Disord</source>  
        <year>2002</year>  
        <month>04</month>  
        <volume>1</volume>  
        <issue>2</issue>  
        <fpage>141</fpage>  
        <lpage>7</lpage>  
        <pub-id pub-id-type="medline">12769623</pub-id></nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hunter</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Grealish</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Dowling</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Improving quality of life for adolescents with psychosis</article-title>
        <source>Mental Health Practice</source>  
        <year>2010</year>  
        <fpage>32</fpage>  
        <lpage>5</lpage>  
        <pub-id pub-id-type="doi">10.7748/mhp2010.04.13.7.32.c7650</pub-id></nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Perlman</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Kotov</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Fu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bromet</surname>
            <given-names>EJ</given-names>
          </name>
          <name name-style="western">
            <surname>Fochtmann</surname>
            <given-names>LJ</given-names>
          </name>
          <name name-style="western">
            <surname>Medeiros</surname>
            <given-names>H</given-names>
          </name>
          <collab>Genomic Psychiatry Cohort Consortium</collab>
          <name name-style="western">
            <surname>Pato</surname>
            <given-names>MT</given-names>
          </name>
          <name name-style="western">
            <surname>Pato</surname>
            <given-names>CN</given-names>
          </name>
        </person-group>
        <article-title>Symptoms of psychosis in schizophrenia, schizoaffective disorder, and bipolar disorder: a comparison of African Americans and Caucasians in the Genomic Psychiatry Cohort</article-title>
        <source>Am J Med Genet B Neuropsychiatr Genet</source>  
        <year>2016</year>  
        <month>06</month>  
        <volume>171</volume>  
        <issue>4</issue>  
        <fpage>546</fpage>  
        <lpage>55</lpage>  
        <pub-id pub-id-type="doi">10.1002/ajmg.b.32409</pub-id>
        <pub-id pub-id-type="medline">26663585</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Khaodhiar</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>McCowen</surname>
            <given-names>KC</given-names>
          </name>
          <name name-style="western">
            <surname>Blackburn</surname>
            <given-names>GL</given-names>
          </name>
        </person-group>
        <article-title>Obesity and its comorbid conditions</article-title>
        <source>Clin Cornerstone</source>  
        <year>1999</year>  
        <volume>2</volume>  
        <issue>3</issue>  
        <fpage>17</fpage>  
        <lpage>31</lpage>  
        <pub-id pub-id-type="medline">10696282</pub-id></nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Duggan</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>Pew Research Center</source>  
        <access-date>2016-03-22</access-date>
        <comment>Mobile messaging and social media 2015 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.pewinternet.org/2015/08/19/mobile-messaging-and-social-media-2015/">http://www.pewinternet.org/2015/08/19/mobile-messaging-and-social-media-2015/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6gCZOB0Lz"/></comment> </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
        <source>Twitter</source>  
        <access-date>2016-10-04</access-date>
        <comment>Twitter Developer Documentation 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://dev.twitter.com/overview/terms/agreement-and-policy">https://dev.twitter.com/overview/terms/agreement-and-policy</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6l0jyN5el"/></comment> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
