<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i2e19273</article-id>
      <article-id pub-id-type="pmid">32427106</article-id>
      <article-id pub-id-type="doi">10.2196/19273</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Tracking Social Media Discourse About the COVID-19 Pandemic: Development of a Public Coronavirus Twitter Data Set</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jonas</surname>
            <given-names>Adam</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Da Silva</surname>
            <given-names>Edson</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Saud</surname>
            <given-names>Muhammad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Emily</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2363-9889</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Lerman</surname>
            <given-names>Kristina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5071-0575</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ferrara</surname>
            <given-names>Emilio</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Information Sciences Institute</institution>
            <institution>University of Southern California</institution>
            <addr-line>4676 Admiralty Way, #1001</addr-line>
            <addr-line>Marina del Rey, CA, 90292</addr-line>
            <country>United States</country>
            <phone>1 310 448 8661</phone>
            <email>emiliofe@usc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1942-2831</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Information Sciences Institute</institution>
        <institution>University of Southern California</institution>
        <addr-line>Marina del Rey, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Emilio Ferrara <email>emiliofe@usc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Apr-Jun</season>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>6</volume>
      <issue>2</issue>
      <elocation-id>e19273</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>5</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Emily Chen, Kristina Lerman, Emilio Ferrara. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 29.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://publichealth.jmir.org/2020/2/e19273/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>At the time of this writing, the coronavirus disease (COVID-19) pandemic outbreak has already put tremendous strain on many countries' citizens, resources, and economies around the world. Social distancing measures, travel bans, self-quarantines, and business closures are changing the very fabric of societies worldwide. With people forced out of public spaces, much of the conversation about these phenomena now occurs online on social media platforms like Twitter.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this paper, we describe a multilingual COVID-19 Twitter data set that we are making available to the research community via our COVID-19-TweetIDs GitHub repository.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We started this ongoing data collection on January 28, 2020, leveraging Twitter’s streaming application programming interface (API) and Tweepy to follow certain keywords and accounts that were trending at the time data collection began. We used Twitter’s search API to query for past tweets, resulting in the earliest tweets in our collection dating back to January 21, 2020.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Since the inception of our collection, we have actively maintained and updated our GitHub repository on a weekly basis. We have published over 123 million tweets, with over 60% of the tweets in English. This paper also presents basic statistics that show that Twitter activity responds and reacts to COVID-19-related events.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>It is our hope that our contribution will enable the study of online conversation dynamics in the context of a planetary-scale epidemic outbreak of unprecedented proportions and implications. This data set could also help track COVID-19-related misinformation and unverified rumors or enable the understanding of fear and panic—and undoubtedly more.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>SARS-CoV-2</kwd>
        <kwd>social media</kwd>
        <kwd>network analysis</kwd>
        <kwd>computational social sciences</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The first cases of coronavirus disease (officially named COVID-19 by the World Health Organization [WHO] on February 11, 2020) were reported in Wuhan, China, in late December 2019; the first fatalities were reported in early 2020 [<xref ref-type="bibr" rid="ref1">1</xref>]. The fast-rising infections and death toll led the Chinese government to quarantine the city of Wuhan on January 23, 2020 [<xref ref-type="bibr" rid="ref1">1</xref>]. During this period, other countries began reporting their first confirmed cases of the disease, and on January 30, 2020, the WHO announced a Public Health Emergency of International Concern. With more countries reporting cases of the disease, and infections rapidly escalating in some regions of the world, including South Korea, Iran, and Italy, the WHO declared COVID-19 a pandemic [<xref ref-type="bibr" rid="ref2">2</xref>]. At the time of this writing, COVID-19 has been reported in 185 countries, leaving governments all over the world scrambling for ways to contain the disease and lessen its adverse consequences to their people's health and economy [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>Preventative measures implemented by national, state, and local governments now affect the daily routines of millions of people worldwide [<xref ref-type="bibr" rid="ref4">4</xref>]. <italic>Social distancing</italic>, the most widely used of such measures, aims to curtail new infections by reducing physical contact between people [<xref ref-type="bibr" rid="ref5">5</xref>]. Social distancing measures have led to the cancellation of sporting events and conferences [<xref ref-type="bibr" rid="ref6">6</xref>], closures of schools and colleges [<xref ref-type="bibr" rid="ref7">7</xref>], and has forced many businesses to require their employees to work from home [<xref ref-type="bibr" rid="ref8">8</xref>]. As more and more social interactions move online, the conversation around COVID-19 has continued to expand, with growing numbers turning to social media for both information and company [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Platforms such as Twitter have become central to the technological and social infrastructure that allows us to stay connected even during crises.</p>
      <p>We describe a Twitter data set about COVID-19-related online conversations that we are sharing with the research community. People all over the world take to Twitter to express opinions and engage in dialogue in a public forum, and, with Twitter’s open application programming interface (API), has proven to be an invaluable resource for studying a wide range of topics. Twitter has long been used by the research community as a means to understand dynamics observable in online social networks, from information dissemination [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] to the prevalence and influence of bots and misinformation [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. More importantly during the current COVID-19 pandemic, Twitter provides researchers the ability to study the role social media plays in the global health crisis [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. We hope that this data will spur new research about the social dimensions of the pandemic.</p>
      <p>We began collecting data in real time from Twitter, with the earliest tweets dating to January 21, 2020, by tracking COVID-19-related keywords and accounts. Here, we describe the data collection methods, document initial data statistics, and provide information about how to obtain and use the data.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>We have been actively collecting tweets since January 28, 2020, leveraging Twitter's streaming API [<xref ref-type="bibr" rid="ref20">20</xref>] and Tweepy [<xref ref-type="bibr" rid="ref21">21</xref>] to follow specific keywords and accounts that were trending at the time. When we started collecting tweets, we also used Twitter's search API [<xref ref-type="bibr" rid="ref22">22</xref>] on the same keywords to gather related historical tweets. Thus, the earliest tweets in our collection date back to January 21, 2020. Since then, we have incrementally added keywords and accounts to follow based on the conversations occurring on Twitter at any time. We have collected over 72 million tweets from inception to March 21, 2020, constituting roughly 600 GB of raw data, and are still collecting data to this day.</p>
        <p>Our collection relies upon publicly available data and is hence registered as IRB (institutional review board) exempt by the University of Southern California IRB (approved protocol UP-17-00610). We release the data set with the stipulation that those who use it must comply with Twitter’s Terms and Conditions [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>Tracked Keywords and Accounts</title>
        <p>By continuously monitoring Twitter's trending topics, keywords, and sources associated with COVID-19, we did our best to capture conversations related to the outbreak.</p>
        <p>Twitter's streaming API returns any tweet containing the keyword(s) in the text of the tweet, as well as in its metadata; therefore, it is not always necessary to have each permutation of a specific keyword in the tracking list. For example, the keyword “Covid” will return tweets that contain both “Covid19” and “Covid-19.” We list a subset of the keywords and accounts that we are following in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>, respectively, along with the date we began tracking them. There are some keywords that overlap due to an included keyword being a substring of another, but we included both for good measure. The keyword choices in the current data set are all in English, so there is a heavy bias toward English tweets and events related to English-speaking countries. Due to the evolving nature of the pandemic and online conversations, these tables will expand as we continue to monitor Twitter for additional keywords and accounts to add to our tracking list.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>A sample of the keywords that we are actively tracking in our Twitter collection; see the GitHub repository for a full list of all tracked keywords (v1.8—May 8, 2020) [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="790"/>
            <thead>
              <tr valign="top">
                <td>Tracked since</td>
                <td>Keyword</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1/21/2020</td>
                <td>Coronavirus; Corona; CDC; Ncov; Wuhan; Outbreak; China</td>
              </tr>
              <tr valign="top">
                <td>1/22/2020</td>
                <td>Koronavirus; Wuhancoronavirus; Wuhanlockdown; N95; Kungflu; Epidemic; Sinophobia</td>
              </tr>
              <tr valign="top">
                <td>2/16/2020</td>
                <td>Covid-19</td>
              </tr>
              <tr valign="top">
                <td>3/2/2020</td>
                <td>Corona virus</td>
              </tr>
              <tr valign="top">
                <td>3/6/2020</td>
                <td>Covid19; Sars-cov-2</td>
              </tr>
              <tr valign="top">
                <td>3/8/2020</td>
                <td>COVID–19</td>
              </tr>
              <tr valign="top">
                <td>3/12/2020</td>
                <td>COVD; Pandemic</td>
              </tr>
              <tr valign="top">
                <td>3/13/2020</td>
                <td>Coronapocalypse; CancelEverything; Coronials; SocialDistancing</td>
              </tr>
              <tr valign="top">
                <td>3/14/2020</td>
                <td>Panic buying; DuringMy14DayQuarantine; Panic shopping; InMyQuarantineSurvivalKit</td>
              </tr>
              <tr valign="top">
                <td>3/16/2020</td>
                <td>chinese virus; stayhomechallenge; DontBeASpreader; lockdown</td>
              </tr>
              <tr valign="top">
                <td>3/18/2020</td>
                <td>shelteringinplace; staysafestayhome; trumppandemic; flatten the curve</td>
              </tr>
              <tr valign="top">
                <td>3/19/2020</td>
                <td>PPEshortage; saferathome; stayathome</td>
              </tr>
              <tr valign="top">
                <td>3/21/2020</td>
                <td>GetMePPE</td>
              </tr>
              <tr valign="top">
                <td>3/26/2020</td>
                <td>covidiot</td>
              </tr>
              <tr valign="top">
                <td>3/28/2020</td>
                <td>epitwitter</td>
              </tr>
              <tr valign="top">
                <td>3/31/2020</td>
                <td>Pandemie</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Account names that we are actively tracking in our Twitter collection (v1.8—May 8, 2020).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="790"/>
            <thead>
              <tr valign="top">
                <td>Tracked since</td>
                <td>Account name</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1/22/2020</td>
                <td>PneumoniaWuhan; CoronaVirusInfo; V2019N; CDCemergency; CDCgov; WHO; HHSGov; NIAIDNews</td>
              </tr>
              <tr valign="top">
                <td>3/15/2020</td>
                <td>DrTedros</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Releases</title>
        <p>Our data collection will continue uninterrupted for the foreseeable future. As the pandemic continues to run its course, we anticipate that the amount of data will grow significantly. The data set is available on GitHub [<xref ref-type="bibr" rid="ref24">24</xref>] and is released in compliance with the Twitter's Terms and Conditions, under which we are unable to publicly release the text of the collected tweets. We are, therefore, releasing the Tweet IDs, which are unique identifiers tied to specific tweets. The Tweet IDs can be used by researchers to query Twitter’s API and obtain the complete tweet object, including tweet content (text, URLs, hashtags, etc) and authors’ metadata. This process to retrieve the full tweet object from Twitter starting from a Tweet ID is referred to as <italic>hydration</italic>. There are several easy-to-use tools that have been developed for such purposes, including the <italic>Hydrator</italic> [<xref ref-type="bibr" rid="ref25">25</xref>] and <italic>Twarc</italic> [<xref ref-type="bibr" rid="ref26">26</xref>], but one could also directly use Twitter’s API to retrieve the desired data. This data set can also be found on Harvard Dataverse [<xref ref-type="bibr" rid="ref27">27</xref>]. <xref ref-type="table" rid="table3">Table 3</xref> displays basic statistics, including collection period and number of tweets in that respective release, for all current releases (as of May 15, 2020).</p>
        <p>There are a few known gaps in the data, which are listed in <xref ref-type="table" rid="table4">Table 4</xref>. Due to Twitter API restrictions on free data access, we were unable to recover data from the listed times, as Twitter only provides free access to tweets returned from their streaming API from the past week. To request access, interested researchers will need to agree upon the terms of usage dictated by the chosen license.</p>
        <p>All of the Tweet ID files are stored in folders that indicate the year and month the tweet was posted (YEAR-MONTH). The individual Tweet ID files each contain a collection of Tweet IDs, with the file names all beginning with the prefix “coronavirus-tweet-id-” followed by the year, month, date, and hour the tweet was posted (YEAR-MONTH-DATE-HOUR).</p>
        <p>We note that if a tweet has been removed from the platform, researchers will not be able to obtain the original Tweet.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>List of all releases and their statistics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="180"/>
            <col width="340"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Release version</td>
                <td>Release date</td>
                <td>Data collection period</td>
                <td>Tweets, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>v1.0</td>
                <td>3/17/2020</td>
                <td>3/05/2020 - 3/12/2020</td>
                <td>8,919,411</td>
              </tr>
              <tr valign="top">
                <td>v1.1</td>
                <td>3/23/2020</td>
                <td>1/21/2020 - 3/12/2020</td>
                <td>63,616,072</td>
              </tr>
              <tr valign="top">
                <td>v1.2</td>
                <td>3/31/2020</td>
                <td>1/21/2020 - 3/21/2020</td>
                <td>72,403,796</td>
              </tr>
              <tr valign="top">
                <td>v1.3</td>
                <td>4/11/2020</td>
                <td>1/21/2020 - 4/03/2020</td>
                <td>87,209,465</td>
              </tr>
              <tr valign="top">
                <td>v1.4</td>
                <td>4/13/2020</td>
                <td>1/21/2020 - 4/10/2020</td>
                <td>94,671,486</td>
              </tr>
              <tr valign="top">
                <td>v1.5</td>
                <td>4/20/2020</td>
                <td>1/21/2020 - 4/17/2020</td>
                <td>101,771,227</td>
              </tr>
              <tr valign="top">
                <td>v1.6</td>
                <td>4/26/2020</td>
                <td>1/21/2020 - 4/24/2020</td>
                <td>109,013,655</td>
              </tr>
              <tr valign="top">
                <td>v1.7</td>
                <td>5/04/2020</td>
                <td>1/21/2020 - 5/01/2020</td>
                <td>115,929,358</td>
              </tr>
              <tr valign="top">
                <td>v1.8</td>
                <td>5/11/2020</td>
                <td>1/21/2020 - 5/08/2020</td>
                <td>123,113,914</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Known gaps in the data set in UTC (v1.8—May 8, 2020).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="790"/>
            <thead>
              <tr valign="top">
                <td>Date</td>
                <td>Time</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>2/1/2020</td>
                <td>4:00 - 9:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>2/8/2020</td>
                <td>6:00 - 7:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>2/22/2020</td>
                <td>21:00 - 24:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>2/23/2020</td>
                <td>0:00 - 24:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>2/24/2020</td>
                <td>0:00 - 4:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>2/25/2020</td>
                <td>0:00 - 3:00 UTC</td>
              </tr>
              <tr valign="top">
                <td>3/2/2020</td>
                <td>Intermittent internet connectivity issues</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <sec>
          <title>The Most Recent Release (Release v1.8—May 11, 2020)</title>
          <p>Our 9th release spans January 21, 2020, through May 8, 2020. The data set available now contains tweets from January 21, 2020 (22:00 UTC), through May 8, 2020 (21:00 UTC), with 123,113,914 tweets. The language breakdown of the tweets can be found in <xref ref-type="table" rid="table5">Table 5</xref>. A subset of the keywords and accounts that were followed during this timeframe can be identified by referencing <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>. For a full and up-to-date list of the keywords we are tracking, please see the “keywords.txt” file in the GitHub repository (a list of the accounts we are tracking can be found in the “accounts.txt” file) [<xref ref-type="bibr" rid="ref24">24</xref>]. Some of the keywords may appear earlier than the initial listed track date in <xref ref-type="table" rid="table1">Table 1</xref>, as we systematically ran the same keywords through Twitter's search API to collect past instances of the keywords shortly after adding the keywords to be tracked in real time.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Breakdown of the most popular languages and the number of associated tweets (v1.8—May 8, 2020).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="330"/>
              <col width="260"/>
              <col width="410"/>
              <thead>
                <tr valign="top">
                  <td>Language</td>
                  <td>ISO<sup>a</sup></td>
                  <td>Tweets (N=123,113,914), n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>English</td>
                  <td>en</td>
                  <td>80,698,556 (65.55)</td>
                </tr>
                <tr valign="top">
                  <td>Spanish</td>
                  <td>es</td>
                  <td>13,848,449 (11.25)</td>
                </tr>
                <tr valign="top">
                  <td>Indonesian</td>
                  <td>in</td>
                  <td>4,196,591 (3.41)</td>
                </tr>
                <tr valign="top">
                  <td>French</td>
                  <td>fr</td>
                  <td>3,762,601 (3.06)</td>
                </tr>
                <tr valign="top">
                  <td>Portuguese</td>
                  <td>pt</td>
                  <td>3,451,196 (2.80)</td>
                </tr>
                <tr valign="top">
                  <td>Japanese</td>
                  <td>ja</td>
                  <td>2,897,046 (2.35)</td>
                </tr>
                <tr valign="top">
                  <td>Thai</td>
                  <td>th</td>
                  <td>2,754,627 (2.24)</td>
                </tr>
                <tr valign="top">
                  <td>(undefined)</td>
                  <td>und</td>
                  <td>2,711,649 (2.20)</td>
                </tr>
                <tr valign="top">
                  <td>Italian</td>
                  <td>it</td>
                  <td>1,615,916 (1.31)</td>
                </tr>
                <tr valign="top">
                  <td>Turkish</td>
                  <td>tr</td>
                  <td>1,308,989 (1.06)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>ISO: International Organization for Standardization.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>General Release Notes</title>
          <p>In order to use any Twitter-facing libraries, including hydration software, users must first apply for a Twitter developer account and obtain the necessary authentication tokens [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
          <p>The GitHub community has also generously contributed scripts to enable researchers to hydrate the Tweet IDs using <italic>Twarc</italic> [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>We present an initial analysis of our collected data set that verifies that Twitter discourse statistics reflect major events at the time, and leverage Business Insider [<xref ref-type="bibr" rid="ref29">29</xref>], NBC [<xref ref-type="bibr" rid="ref30">30</xref>], and CNN [<xref ref-type="bibr" rid="ref31">31</xref>] released timelines to identify these events of interest during the development of the COVID-19 pandemic. In some of these analyses, there is a dip on March 2, 2020—this was due to internet connectivity failures throughout that specific day. Our discussion is based on analysis done on tweets from release v1.2 (January 21, 2020 to March 31, 2020), while the most recent release is v1.8.</p>
      </sec>
      <sec>
        <title>Hashtags</title>
        <p>We tracked the frequency of COVID-19-related hashtags, specifically those that contain the substrings “wuhan,” “coronavirus,” and “covid” throughout our collection period (<xref rid="figure1" ref-type="fig">Figure 1</xref>). We can see that while hashtags with the substring “coronavirus” consistently remain a more heavily used hashtag in our data set, the hashtag usage spiked on the day the WHO declared COVID-19 a global public health emergency; it also spiked on the day the United States announced the first COVID-19-related death [<xref ref-type="bibr" rid="ref2">2</xref>]. We also did not see hashtags referencing “covid” being used until February 11, 2020, when the WHO announced “COVID-19” as the official name for the novel coronavirus disease. The keyword “wuhan” in hashtags experienced consistent usage until late February, then steadily declined, which reflects the decrease in cases in China and the global spread of the virus.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Usage of hashtags containing the substrings “wuhan,” “covid,” and “coronavirus” over time. COVID-19: coronavirus disease; WHO: World Health Organization.</p>
          </caption>
          <graphic xlink:href="publichealth_v6i2e19273_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Languages</title>
        <p>We then examined the percentage of total tweets posted in different languages (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Although English is the most prominent language in our data set, we excluded English from this analysis to better visualize tweet activity in countries that experienced COVID-19 outbreaks earlier in the timeline. In particular, we found that Japanese tweet activity increased steadily after the cruise ship Diamond Princess was quarantined off the coast of Yokohama, Japan, with a peak around the time when passengers began to disembark [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>There was also a significant spike in tweets from Italy when the first case related to COVID-19 was reported in Lodi, Italy, and first death was seen in Veneto [<xref ref-type="bibr" rid="ref33">33</xref>]. We also observed a peak in the percentage of Spanish tweets after the first COVID-19 case in Spain was announced on February 1, 2020 [<xref ref-type="bibr" rid="ref34">34</xref>] and a steady increase in the percentage of Spanish tweets after reports of the first COVID-19-related death began to emerge (the death itself occurred on February 13th, but the cause was diagnosed postmortem) [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Tweets in Spanish, Italian, and Japanese over time (our multilingual database began data collection after January 28, 2020).</p>
          </caption>
          <graphic xlink:href="publichealth_v6i2e19273_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Verified Users</title>
        <p>Verified users on Twitter have been identified by Twitter as accounts of public interest and are verified to be authentic accounts [<xref ref-type="bibr" rid="ref36">36</xref>]. We observed that the verified accounts, which include news sources and political figures, are the most active when major events occur, as seen in <xref rid="figure3" ref-type="fig">Figure 3</xref>. This is to be expected since influential figures and news sources often weigh in and report on breaking news in real time using Twitter as a platform to amplify their messaging. As the United States also drives much of the discourse on Twitter, it is therefore unsurprising that there is a major spike in activity from verified users when the country experienced its first COVID-19-related death.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Number of tweets from verified users over time. COVID-19: coronavirus disease; WHO: World Health Organization.</p>
          </caption>
          <graphic xlink:href="publichealth_v6i2e19273_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are several limitations to our data set. We collect our data set leveraging Twitter’s free streaming API, which only returns 1% of the total Twitter volume, and the volume of tweets we collected continues to be dependent on our filter endpoint and network connection [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>While our data set is a multilingual data set, containing tweets in over 67 languages, the keywords and accounts we have been tracking and continue to track have been mostly English keywords and accounts. Thus, there is a significant bias in favor of English tweets in our data set over tweets in other languages.</p>
        <p>Despite these limitations, our data collection gathers over 1 million tweets a day from the 1% of tweets available to us through Twitter’s API, and our data set contains on average 35% non-English tweets. Our collection begins in late January, capturing tweets during many major developments, and we plan on continuing collecting tweets for the foreseeable future.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">COVID-19</term>
          <def>
            <p>coronavirus disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">IRB</term>
          <def>
            <p>institutional review board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors gratefully acknowledge support from the Defense Advanced Research Projects Agency (DARPA); contract #W911NF-17-C-0094.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>EC was responsible for data curation. All authors contributed to the writing of this manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>NY Times</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>A Timeline of the Coronavirus Pandemic<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/article/coronavirus-timeline.html">https://www.nytimes.com/article/coronavirus-timeline.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <source>World Health Organization</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Rolling updates on a coronavirus disease (COVID-19)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/emergencies/diseases/novel-coronavirus-2019/events-as-they-happen">https://www.who.int/emergencies/diseases/novel-coronavirus-2019/events-as-they-happen</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>An interactive web-based dashboard to track COVID-19 in real time</article-title>
          <source>Lancet Infect Dis</source>
          <year>2020</year>
          <month>05</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>533</fpage>
          <lpage>534</lpage>
          <pub-id pub-id-type="doi">10.1016/s1473-3099(20)30120-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khurana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Siemaszko</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>DeJesus-Banos</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>NBC News</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>Stay-at-home orders across the country<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nbcnews.com/health/health-news/here-are-stay-home-orders-across-country-n1168736">https://www.nbcnews.com/health/health-news/here-are-stay-home-orders-across-country-n1168736</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <source>Centers for Disease Control and Prevention</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>Social Distancing<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/coronavirus/2019-ncov/prevent-getting-sick/social-distancing.html">https://www.cdc.gov/coronavirus/2019-ncov/prevent-getting-sick/social-distancing.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hadden</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Casado</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Business Insider</source>
          <access-date>2020-05-15</access-date>
          <comment>Here are the latest major events that have been canceled or postponed because of the coronavirus outbreak, including the 2020 Tokyo Olympics, Burning Man, and the 74th Annual Tony Awards<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.businessinsider.com/major-events-cancelled-or-postponed-due-to-the-coronavirus-2020">https://www.businessinsider.com/major-events-cancelled-or-postponed-due-to-the-coronavirus-2020</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chavez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Moshtaghian</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>CNN</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>48 states have ordered or recommended that schools don’t reopen this academic year<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnn.com/2020/04/18/us/schools-closed-coronavirus/index.html">https://www.cnn.com/2020/04/18/us/schools-closed-coronavirus/index.html</ext-link> </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hadden</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Casado</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sonnemaker</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Business Insider</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>Apple, Google, and Amazon are among the largest global companies who have restricted travel or asked their employees to work remotely as a precaution against the novel coronavirus. Here's the full list<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.businessinsider.com/companies-asking-employees-to-work-from-home-due-to-coronavirus-2020">https://www.businessinsider.com/companies-asking-employees-to-work-from-home-due-to-coronavirus-2020</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abbas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eliyana</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ekowati</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Saud</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Raza</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wardani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Data set on coping strategies in the digital age: The role of psychological well-being and social capital among university students in Java Timor, Surabaya, Indonesia</article-title>
          <source>Data Brief</source>
          <year>2020</year>
          <month>06</month>
          <volume>30</volume>
          <fpage>105583</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2352-3409(20)30477-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.dib.2020.105583</pub-id>
          <pub-id pub-id-type="medline">32368599</pub-id>
          <pub-id pub-id-type="pii">S2352-3409(20)30477-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC7184248</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fischer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>AXIOS</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>Social media use spikes during pandemic<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.axios.com/social-media-overuse-spikes-in-coronavirus-pandemic-764b384d-a0ee-4787-bd19-7e7297f6d6ec.html">https://www.axios.com/social-media-overuse-spikes-in-coronavirus-pandemic-764b384d-a0ee-4787-bd19-7e7297f6d6ec.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Information contagion: An empirical study of the spread of news on digg and twitter social networks</article-title>
          <year>2010</year>
          <conf-name>Fourth International AAAI Conference on Weblogs and Social Media</conf-name>
          <conf-date>2010</conf-date>
          <conf-loc>Washington, DC</conf-loc>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>AAAI Publications</publisher-name>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1003.2664"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Romero</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Meeder</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kleinberg</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Differences in the Mechanics of Information Diffusion across Topics: Idioms, Political Hashtags, Complex Contagion on Twitter</article-title>
          <year>2011</year>
          <conf-name>The 20th International Conference on World Wide Web WWW '11</conf-name>
          <conf-date>2011</conf-date>
          <conf-loc>New York, NY, USA</conf-loc>
          <publisher-loc>New York, NY, USA</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>695</fpage>
          <pub-id pub-id-type="doi">10.1145/1963405.1963503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castillo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mendoza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Poblete</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Information Credibility on Twitter</article-title>
          <year>2011</year>
          <conf-name>The 20th International Conference on World Wide Web WWW '11</conf-name>
          <conf-date>2011</conf-date>
          <conf-loc>New York, NY, USA</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>675</fpage>
          <lpage>684</lpage>
          <pub-id pub-id-type="doi">10.1145/1963405.1963500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Varol</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Menczer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Flammini</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The rise of social bots</article-title>
          <source>Commun ACM</source>
          <year>2016</year>
          <month>06</month>
          <day>24</day>
          <volume>59</volume>
          <issue>7</issue>
          <fpage>96</fpage>
          <lpage>104</lpage>
          <pub-id pub-id-type="doi">10.1145/2818717</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>IC</given-names>
            </name>
            <name name-style="western">
              <surname>Tse</surname>
              <given-names>ZTH</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pechta</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marquez-Lameda</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Meltzer</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Lubell</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>How did Ebola information spread on twitter: broadcasting or viral spreading?</article-title>
          <source>BMC Public Health</source>
          <year>2019</year>
          <month>04</month>
          <day>25</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>438</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-019-6747-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-019-6747-8</pub-id>
          <pub-id pub-id-type="medline">31023299</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-019-6747-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6485141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
          <source>PLoS One</source>
          <year>2010</year>
          <month>11</month>
          <day>29</day>
          <volume>5</volume>
          <issue>11</issue>
          <fpage>e14118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0014118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
          <pub-id pub-id-type="medline">21124761</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>#COVID-19 on Twitter: Bots, Conspiracies, and Social Media Activism (arXiv.09531)</article-title>
          <source>arXiv.org</source>
          <year>2020</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.09531"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chong</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Conversations and Medical News Frames on Twitter: Infodemiological Study on COVID-19 in South Korea</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>05</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e18897</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e18897/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18897</pub-id>
          <pub-id pub-id-type="medline">32325426</pub-id>
          <pub-id pub-id-type="pii">v22i5e18897</pub-id>
          <pub-id pub-id-type="pmcid">PMC7202309</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Alrazaq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuwail</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Top Concerns of Tweeters During the COVID-19 Pandemic: Infoveillance Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e19016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e19016/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19016</pub-id>
          <pub-id pub-id-type="medline">32287039</pub-id>
          <pub-id pub-id-type="pii">v22i4e19016</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <access-date>2020-04-10</access-date>
          <comment>Consuming streaming data<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/docs/tutorials/consuming-streaming-data">https://developer.twitter.com/en/docs/tutorials/consuming-streaming-data</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <source>Tweepy</source>
          <access-date>2020-04-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tweepy.org/">https://www.tweepy.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <access-date>2020-04-10</access-date>
          <comment>Search Tweets<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets">https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Developer Agreement and Policy<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/developer-terms/agreement-and-policy">https://developer.twitter.com/en/developer-terms/agreement-and-policy</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>GitHub</source>
          <year>2020</year>
          <access-date>2020-05-15</access-date>
          <comment>COVID-19-TweetIDs<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/echen102/COVID-19-TweetIDs">https://github.com/echen102/COVID-19-TweetIDs</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>DocNow</collab>
          </person-group>
          <source>GitHub</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Hydrator<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/DocNow/hydrator">https://github.com/DocNow/hydrator</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>DocNow</collab>
          </person-group>
          <source>GitHub</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Twarc<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/DocNow/twarc">https://github.com/DocNow/twarc</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 TweetIDs</article-title>
          <source>Harvard Dataverse</source>
          <year>2020</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.7910/DVN/DKOVLA"/>
          </comment>
          <pub-id pub-id-type="doi">10.7910/DVN/DKOVLA</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Developers<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en">https://developer.twitter.com/en</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Secon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Woodward</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mosher</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Business Insider</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>A comprehensive timeline of the new coronavirus pandemic, from China's first COVID-19 case to the present<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.businessinsider.com/coronavirus-pandemic-timeline-history-major-events-2020-3">https://www.businessinsider.com/coronavirus-pandemic-timeline-history-major-events-2020-3</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Muccari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>NBC News</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Coronavirus timeline: Tracking the critical moments of COVID-19<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nbcnews.com/health/health-news/coronavirus-timeline-tracking-critical-moments-covid-19-n1154341">https://www.nbcnews.com/health/health-news/coronavirus-timeline-tracking-critical-moments-covid-19-n1154341</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>CNN Editorial Research</collab>
          </person-group>
          <source>CNN</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Coronavirus Outbreak Timeline Fast Facts<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnn.com/2020/02/06/health/wuhan-coronavirus-timeline-fast-facts/index.html">https://www.cnn.com/2020/02/06/health/wuhan-coronavirus-timeline-fast-facts/index.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Helsel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>NBC News</source>
          <access-date>2020-04-10</access-date>
          <comment>10 coronavirus cases confirmed from cruise ship quarantined in Japan<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nbcnews.com/news/world/10-coronavirus-cases-confirmed-cruise-ship-quarantined-japan-n1130296">https://www.nbcnews.com/news/world/10-coronavirus-cases-confirmed-cruise-ship-quarantined-japan-n1130296</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Popovich</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>NY Times</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Italy's Virus Shutdown Came Too Late. What Happens Now?<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/interactive/2020/04/05/world/europe/italy-coronavirus-lockdown-reopen.html">https://www.nytimes.com/interactive/2020/04/05/world/europe/italy-coronavirus-lockdown-reopen.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kottasová</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <source>CNN</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>February 1 coronavirus news<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnn.com/asia/live-news/coronavirus-outbreak-02-01-20-intl-hnk/h_afcf3a4665521aab11c721c8cc80dd03">https://www.cnn.com/asia/live-news/coronavirus-outbreak-02-01-20-intl-hnk/h_afcf3a4665521aab11c721c8cc80dd03</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Heinrich</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Reuters</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>Spain reports first coronavirus death in Valencia<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.reuters.com/article/us-health-coronavirus-spain-death/spain-confirms-countrys-first-death-from-coronavirus-health-official-idUSKBN20Q2TG">https://www.reuters.com/article/us-health-coronavirus-spain-death/spain-confirms-countrys-first-death-from-coronavirus-health-official-idUSKBN20Q2TG</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <year>2020</year>
          <access-date>2020-04-10</access-date>
          <comment>About verified accounts<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://help.twitter.com/en/managing-your-account/about-twitter-verified-accounts">https://help.twitter.com/en/managing-your-account/about-twitter-verified-accounts</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morstatter</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeffer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Carley</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Is the Sample Good Enough? Comparing Data from Twitter's Streaming API with Twitter's Firehose</article-title>
          <source>arXiv.org</source>
          <year>2013</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1306.5204"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
