<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i11e30642</article-id>
      <article-id pub-id-type="pmid">34653016</article-id>
      <article-id pub-id-type="doi">10.2196/30642</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>COVID-19 Vaccine Hesitancy on Social Media: Building a Public Twitter Data Set of Antivaccine Content, Vaccine Misinformation, and Conspiracies</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sanchez</surname>
            <given-names>Travis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>DeVerna</surname>
            <given-names>Matthew</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ramachandran</surname>
            <given-names>Anandhi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Das</surname>
            <given-names>Manoja</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sakar</surname>
            <given-names>Urmimala</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Muric</surname>
            <given-names>Goran</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Information Sciences Institute</institution>
            <institution>University of Southern California</institution>
            <addr-line>4676 Admiralty Way</addr-line>
            <addr-line>Suite 1001</addr-line>
            <addr-line>Marina del Rey, CA, 90292</addr-line>
            <country>United States</country>
            <phone>1 213 740 2467</phone>
            <email>gmuric@isi.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3700-2347</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Yusong</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6692-3607</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ferrara</surname>
            <given-names>Emilio</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1942-2831</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Information Sciences Institute</institution>
        <institution>University of Southern California</institution>
        <addr-line>Marina del Rey, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Southern California</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Annenberg School for Communication and Journalism</institution>
        <institution>University of Southern California</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Goran Muric <email>gmuric@isi.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>7</volume>
      <issue>11</issue>
      <elocation-id>e30642</elocation-id>
      <history>
        <date date-type="received">
          <day>23</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>8</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>26</day>
          <month>8</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>10</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Goran Muric, Yusong Wu, Emilio Ferrara. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 17.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2021/11/e30642" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>False claims about COVID-19 vaccines can undermine public trust in ongoing vaccination campaigns, posing a threat to global public health. Misinformation originating from various sources has been spreading on the web since the beginning of the COVID-19 pandemic. Antivaccine activists have also begun to use platforms such as Twitter to promote their views. To properly understand the phenomenon of vaccine hesitancy through the lens of social media, it is of great importance to gather the relevant data.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this paper, we describe a data set of Twitter posts and Twitter accounts that publicly exhibit a strong antivaccine stance. The data set is made available to the research community via our AvaxTweets data set GitHub repository. We characterize the collected accounts in terms of prominent hashtags, shared news sources, and most likely political leaning.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We started the ongoing data collection on October 18, 2020, leveraging the Twitter streaming application programming interface (API) to follow a set of specific antivaccine-related keywords. Then, we collected the historical tweets of the set of accounts that engaged in spreading antivaccination narratives between October 2020 and December 2020, leveraging the Academic Track Twitter API. The political leaning of the accounts was estimated by measuring the political bias of the media outlets they shared.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We gathered two curated Twitter data collections and made them publicly available: (1) a streaming keyword–centered data collection with more than 1.8 million tweets, and (2) a historical account–level data collection with more than 135 million tweets. The accounts engaged in the antivaccination narratives lean to the right (conservative) direction of the political spectrum. The vaccine hesitancy is fueled by misinformation originating from websites with already questionable credibility.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The vaccine-related misinformation on social media may exacerbate the levels of vaccine hesitancy, hampering progress toward vaccine-induced herd immunity, and could potentially increase the number of infections related to new COVID-19 variants. For these reasons, understanding vaccine hesitancy through the lens of social media is of paramount importance. Because data access is the first obstacle to attain this goal, we published a data set that can be used in studying antivaccine misinformation on social media and enable a better understanding of vaccine hesitancy.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>vaccine hesitancy</kwd>
        <kwd>COVID-19 vaccines</kwd>
        <kwd>dataset</kwd>
        <kwd>COVID-19</kwd>
        <kwd>SARS-CoV-2</kwd>
        <kwd>social media</kwd>
        <kwd>network analysis</kwd>
        <kwd>hesitancy</kwd>
        <kwd>vaccine</kwd>
        <kwd>Twitter</kwd>
        <kwd>misinformation</kwd>
        <kwd>conspiracy</kwd>
        <kwd>trust</kwd>
        <kwd>public health</kwd>
        <kwd>utilization</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The opposition to vaccination dates back to the 1800s, immediately after the English physician Edward Jenner created the first vaccine in human history. The opponents to the vaccine were vocal and could be found in all segments of society: religious communities protested the unnaturalness of using animal infection in humans, parents were concerned about the invasiveness of the procedure, and vaccinated people were often illustrated with a cow’s head growing from their neck [<xref ref-type="bibr" rid="ref1">1</xref>]. Although vaccination is an effective way to prevent diseases such as diphtheria, tetanus, pertussis, influenza, and measles, almost 1 in 5 children still do not receive routine lifesaving immunizations, and an estimated 1.5 million children still die each year of diseases that could be prevented by vaccines that already exist [<xref ref-type="bibr" rid="ref2">2</xref>]. These fatalities are not only caused by objective reasons, such as lack of access to vaccines due to poverty, but also by the unwillingness and fear regarding vaccines from the parents of these children. The term “vaccine hesitancy” refers to delay in acceptance or refusal of vaccines despite availability of vaccine services [<xref ref-type="bibr" rid="ref3">3</xref>]. Vaccine hesitancy has emerged as a factor in vaccine delay and refusal for adults. A common example is the annual seasonal influenza vaccine. It has been observed that greater hesitancy, both general and specific to the influenza vaccine, is associated with lower vaccine uptake [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. A variety of factors contribute to vaccine hesitancy, including safety concerns, religious reasons, personal beliefs, philosophical reasons, and desire for additional education [<xref ref-type="bibr" rid="ref6">6</xref>]. During the COVID-19 pandemic, although the inoculation of large populations is increasingly important, antivaccine narratives are spreading rapidly, endangering public health, human lives, and the social order.</p>
      <p>With the rise of social media, the dissemination of information (and hence, potentially, misinformation) has become easier than ever before. Unsurprisingly, antivaccine activists have also begun to use platforms such as Twitter to share their views. As a result, their activism has expanded its jurisdictions to include web-based propaganda. Compared with traditional communication channels, social media offers an unprecedented opportunity to spread antivaccination messages and allow communities to form around antivaccine sentiment [<xref ref-type="bibr" rid="ref7">7</xref>]. Social media can amplify the effects of antivaccination misinformation; multiple studies have shown links between susceptibility to misinformation and both vaccine hesitancy and a reduced likelihood to comply with health guidance measures [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Based on these findings, vaccine-related misinformation on social media may exacerbate the levels of vaccine hesitancy, creating pockets with low vaccination rates in the United States and globally; this can hamper progress toward vaccine-induced herd immunity and can potentially increase the number of infections related to new COVID-19 variants, possibly leading to vaccine-resistant mutations. For these reasons, understanding vaccine hesitancy through the lens of social media is of paramount importance. Because data access is the first obstacle to attain this goal, to enable the research community, we built and made public a social media data set of antivaccine content, vaccine misinformation, and related conspiracies. Although researchers have been collecting data related to COVID-19 vaccines [<xref ref-type="bibr" rid="ref11">11</xref>], per our knowledge, there are no public data sets focused specifically on the historical activities of antivaccination accounts on Twitter.</p>
      <p>Here, we present a data set that focuses on antivaccine narratives on Twitter. The data set consists of two complementary collections: (1) the <italic>streaming collection</italic> contains tweets collected using the Twitter Streaming application programming interface (API) from a set of antivaccine keywords, and (2) the <italic>account collection</italic> contains historical tweets from approximately 70,000 accounts that engaged in spreading antivaccination narratives. Additionally, we present initial statistical analyses of the data, including the frequencies of hashtags, analysis of the news sources, the most likely political leaning of the accounts, and geographic distribution.</p>
      <p>The published data set includes tweet IDs of publicly available posts, in compliance with the Twitter Terms of Service [<xref ref-type="bibr" rid="ref12">12</xref>]. This collection builds on the previously published data sets by DeVerna et al [<xref ref-type="bibr" rid="ref11">11</xref>], which is focused on general vaccine narratives, and it complements the previous work by Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>] and Lamsal [<xref ref-type="bibr" rid="ref14">14</xref>], who published some of the largest Twitter data sets related to COVID-19 discourse to date. The complete data set in the form of a list of tweet IDs is openly available on GitHub [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Tracked Keywords for the Streaming Collection</title>
        <p>To create a set of keywords that indicate opposition to vaccines, we used a snowballing sampling technique similar to that of DeVerna et al [<xref ref-type="bibr" rid="ref11">11</xref>]. We started from a small set of manually curated keywords used exclusively in the context of strong vaccine hesitancy that appear on Twitter, such as <italic>#vaccineskill</italic> or <italic>#vaccinedamage</italic>. Using the Twitter Streaming API and the set of seed keywords, we collected the data for one day (October 18, 2020), after which we extracted other keywords that co-occurred with the seed keywords. We added the newly collected keywords to the list of seed keywords, checking them manually for relevance. We then repeated this step several times until we exhausted all the significant co-occurrences and narrowed our selection to approximately 60 keywords. The Twitter API can be queried with a substring of a longer keyword, and it will return the tweets that contain the substring. For example, the keyword <italic>novaccine</italic> will return the tweets that contain <italic>novaccineforme</italic>. We attempted to retain only the most informative and relevant stem words to capture most vaccine-related tweets and to avoid collecting less relevant tweets. The list of all keywords used to collect the streaming collection is listed in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Set of keywords used to collect the tweets in the streaming collection.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="700"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Keyword</td>
                <td>Date on which tracking began</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>
                  <italic>abolishbigpharma</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>antivaccine</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>ArrestBillGates</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>betweenmeandmydoctor</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>bigpharmafia</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>bigpharmakills</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>BillGatesBioTerrorist</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>billgatesevil</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>BillGatesIsEvil</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>billgatesisnotadoctor</italic>
                </td>
                <td>12/23/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>billgatesvaccine</italic>
                </td>
                <td>12/14/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>cdcfraud</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>cdctruth</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>cdcwhistleblower</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>covidvaccineispoison</italic>
                </td>
                <td>12/23/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>depopulation</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>DoctorsSpeakUp</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>educateb4uvax</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>exposebillgates</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>forcedvaccines</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Fuckvaccines</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>idonotconsent</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>informedconsent</italic>
                </td>
                <td>12/14/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>learntherisk</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>medicalfreedom</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>medicalfreedomofchoice</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>momsofunvaccinatedchildren</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>mybodymychoice</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>noforcedflushots</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>NoForcedVaccines</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>notomandatoryvaccines</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>NoVaccine</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>NoVaccineForMe</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>novaccinemandates</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>parentalrights</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>parentsoverpharma</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>saynotovaccines</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>stopmandatoryvaccination</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>syringeslaughter</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>unvaccinated</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>v4vglobaldemo</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinationchoice</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>VaccineAgenda</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinedamage</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinefailure</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinefraud</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccineharm</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccineinjuries</italic>
                </td>
                <td>12/30/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccineinjury</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>VaccinesAreNotTheAnswer</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinesarepoison</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccinescause</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaccineskill</italic>
                </td>
                <td>10/19/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>vaxxed</italic>
                </td>
                <td>11/02/2020</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>yeht</italic>
                </td>
                <td>11/02/2020</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Collecting Tweets for Account Collection</title>
        <p>First, we identified a randomly sampled set of approximately 70,000 accounts that appeared in the streaming collection and that engaged in antivaccine rhetoric between October and December 2020, either by tweeting some of the tracked keywords or by retweeting tweets that contained some of the tracked keywords. Then, for those accounts, we collected their historical tweets using the Twitter API. By leveraging Twitter's Academic Research product track, we were able to access the full archival search and overcome the limit of 3200 historical tweets of the standard API. In this way, we collected almost all the historical tweets of the most queried accounts.</p>
        <p>Our collection relies upon publicly available data in accordance with the Content Redistribution clause under Twitter’s Developer Agreement and Policy [<xref ref-type="bibr" rid="ref12">12</xref>]. We released the data set with the stipulation that those who use it must comply with Twitter’s Terms and Conditions. The complete data set is publicly available on a GitHub repository and is accessible on the web [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      </sec>
      <sec>
        <title>Calculating the Political Leanings of the Accounts</title>
        <p>We calculate the political leaning of each account by measuring the political bias of the media outlets it shared. We use a methodology proposed in prior work [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], and we identified a set of 90 prominent media outlets and accounts that appeared on Twitter. Each of these outlets and their associated Twitter accounts were placed on a political spectrum (left, lean left, center, lean right, right) per ratings provided by the nonpartisan service AllSides [<xref ref-type="bibr" rid="ref19">19</xref>]. For each account in the data set, we maintained a record of all retweets and the original tweets that contained a domain name affiliated with the selected media outlets. The political bias of each account was calculated as the average political bias of all media outlets it shared content from.</p>
      </sec>
      <sec>
        <title>Identifying Low- and High-Credibility Media Sources</title>
        <p>We leveraged <italic>urllib</italic>, the Python URL handling module, to parse the URLs found in the data set. Each URL was broken into several components, including the addressing scheme, network location, and path. A third-party data set that contains the domains associated with websites that share misinformation was used as a ground truth to tag the domain names [<xref ref-type="bibr" rid="ref20">20</xref>]. For URLs that were not in the data set, we queried the Media Bias/Fact Check website [<xref ref-type="bibr" rid="ref21">21</xref>] for further identification. Because URL shortening services such as Bitly [<xref ref-type="bibr" rid="ref22">22</xref>] are widely used on Twitter, shortened URLs appeared frequently. We used <italic>urlExpander</italic> [<xref ref-type="bibr" rid="ref23">23</xref>] to expand the shortened URLs and retrieve the full URLs where possible. Domain names of popular news aggregators and social networks such as Twitter, Facebook, Instagram, Periscope, and YouTube were ignored in the analysis.</p>
      </sec>
      <sec>
        <title>Generating Geolocation Distribution Maps</title>
        <p>To infer a tweet’s geolocation, we used the information of the self-reported location of the account and matched it to a corresponding state in the United States. To calculate the average activity level per population, the absolute number of Tweets was normalized by the 2010 Census-reported population of that state as follows: I = <italic>N<sub>i</sub></italic>/<italic>P<sub>i</sub></italic> × 1,000,000, where <italic>N<sub>i</sub></italic> is the number of tweets originating in state <italic>i</italic> and <italic>P<sub>i</sub></italic> is that state’s population in 2010. This normalization provided information on the average number of collected tweets per million inhabitants. Note that we did not generate the geolocation map for the account collection, as it contains a relatively small number of accounts with self-reported locations.</p>
      </sec>
      <sec>
        <title>Topic Network Analysis</title>
        <p>A topic network was constructed to analyze the co-occurrence of hashtags in the streaming data set. Each node in the graph represented a hashtag, and an edge was added if two hashtags occurred in the same tweet. The node size was proportional to its degree of centrality, and the edge weight was the number of times two hashtags appeared together. For better visualization, nodes with fewer than 25 neighbors were ignored. To investigate the community structure of the network, we used the Louvain algorithm [<xref ref-type="bibr" rid="ref24">24</xref>] on the topic network, which provided further insights about the links between antivaccine topics.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The primary contribution of this study is the data set that we made publicly available. As of this writing (May 2021), we had collected over 137 million tweets organized in two collections. The streaming collection was gathered using the set of antivaccine keywords in <xref ref-type="table" rid="table1">Table 1</xref>. The account collection, on the other hand, contains the historical activities of accounts prone to spreading antivaccination narratives; thus, it is a significantly larger data set compared to the streaming collection. The basic statistics on the two data sets are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The data set is available on GitHub [<xref ref-type="bibr" rid="ref15">15</xref>] and was released in compliance with the Twitter Terms and Conditions. We are unable to provide the full text of the tweets; therefore, we are releasing the Tweet IDs, which are unique identifiers tied to specific tweets. Researchers can retrieve the full text and the related metadata by querying the Twitter API. Because the streaming data collection is still ongoing, the statistics shown below can vary in future versions of the data set. In the following sections, we will describe the streaming collection and account collection separately.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Basic statistics on tweets collected in the streaming collection and account collection.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="500"/>
          <col width="350"/>
          <col width="150"/>
          <thead>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Streaming collection</td>
              <td>Account collection</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Tweets, n</td>
              <td>1,832,333</td>
              <td>135,949,773</td>
            </tr>
            <tr valign="top">
              <td>Accounts, n</td>
              <td>719,652</td>
              <td>78,954</td>
            </tr>
            <tr valign="top">
              <td>Average number of tweets per account</td>
              <td>2.5</td>
              <td>1721.8</td>
            </tr>
            <tr valign="top">
              <td>Verified accounts, n</td>
              <td>9032</td>
              <td>239</td>
            </tr>
            <tr valign="top">
              <td>Accounts with location, n</td>
              <td>5661</td>
              <td>363</td>
            </tr>
            <tr valign="top">
              <td>Date of oldest tweet</td>
              <td>10/19/2020</td>
              <td>3/6/2007</td>
            </tr>
            <tr valign="top">
              <td>Date of most recent tweet</td>
              <td>4/21/2021</td>
              <td>2/2/2021</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <sec>
        <title>Streaming Collection</title>
        <p>The streaming collection consists of 1.8 million tweets created by 719,000 unique accounts between October 18, 2020, and April 21, 2021. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the number of relevant tweets in the streaming collection gradually increases from the start date. The chatter is relatively stable, with small spikes that do not often correspond to major announcements regarding vaccine research or vaccine authorization. We find this surprising, as the news usually drives the discussion on Twitter. Additionally, we observed a large spike in activity near the end of November 2020 that was not caused by any single event but rather by the increased activity of a small number of accounts.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Number of tweets over time in the streaming collection. The times of adverse events related to vaccines are marked by dashed vertical lines. Further descriptions of the news items are provided in the legend below the chart. CDC: US Centers for Disease Control and Prevention; FDA: US Food and Drug Administration.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The overwhelming majority of tweets originated from countries with predominantly English-speaking populations. Out of 1,832,333 tweets in the streaming collection, 1,245,986 (68%) originated in the United States, 229,041 (12.5%) in Great Britain, 100,778 (5.5%) in Canada, 21,987 (1.2%) in Ireland, and 20,155 (1.1%) in Australia; the rest of the tweets originated from other countries. In <xref rid="figure2" ref-type="fig">Figure 2</xref>, we show the geographical distribution of tweets in the United States. As expected, states with a large population, such as California, Texas, Florida, and New York, have more tweets in absolute terms (<xref rid="figure2" ref-type="fig">Figure 2</xref>, top). The number of tweets normalized by state population is depicted in <xref rid="figure2" ref-type="fig">Figure 2</xref> (bottom), with the most tweets per capita originating from Hawaii, Alaska, and Maine, respectively.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Geographical distribution of the tweets from the streaming collection originating in the United States. The location of the tweets was inferred from the self-reported location of the account. Top: absolute number of tweets in each state; bottom: number of tweets normalized by the state population.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="table" rid="table3">Table 3</xref> lists the top 15 most tweeted hashtags in the streaming collection. The count column represents the total number of times a hashtag appears, and the proportion column quantifies the proportion of tweets that contain a specific hashtag out of all tweets with any hashtag. Note that many tweets contain no hashtags, and many tweets with a hashtag contain more than one hashtag. In addition to the most common general hashtags that we expected to find, such as <italic>#vaccine</italic> and <italic>#covid19</italic>, we observed a high proportion of hashtags that carry strong antivaccine sentiment, such as <italic>#novaccineforme</italic>, <italic>#vaxxed</italic> and <italic>#vaccineinjury</italic>. For example, <italic>#novaccineforme</italic> can be found in more than 25,000 tweets, accounting for 6.6% of all tweets in the streaming collection that contain any hashtags. A large set of common hashtags is related to some debunked conspiracy theories that claim there is a global plot by rich individuals to reduce the world population, often expressed through hashtags such as <italic>#depopulation</italic>, <italic>#billgatesbioterrorist</italic> and <italic>#arrestbillgates</italic>. Another set of very frequent hashtags appears benign on the surface. Hashtags such as <italic>#learntherisk</italic> and <italic>#informedconsent</italic> appear to communicate genuine concerns about the safety of the vaccines; however, those hashtags are usually decoys and are very often used by the same accounts that strongly oppose vaccination and that otherwise often use more explicit antivaccine hashtags.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 15 hashtags in the streaming data set. The count is the total number of times a hashtag appears, and the proportion quantifies the proportion of tweets that contain a specific hashtag out of all tweets with a hashtag.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="370"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Hashtag</td>
                <td>Count, n</td>
                <td>Proportion (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>vaccine</td>
                <td>41,069</td>
                <td>10.66</td>
              </tr>
              <tr valign="top">
                <td>vaccines</td>
                <td>33,050</td>
                <td>8.58</td>
              </tr>
              <tr valign="top">
                <td>covid19</td>
                <td>26,616</td>
                <td>6.91</td>
              </tr>
              <tr valign="top">
                <td>novaccineforme</td>
                <td>25,642</td>
                <td>6.66</td>
              </tr>
              <tr valign="top">
                <td>learntherisk</td>
                <td>23,340</td>
                <td>6.06</td>
              </tr>
              <tr valign="top">
                <td>billgatesbioterrorist</td>
                <td>20,197</td>
                <td>5.24</td>
              </tr>
              <tr valign="top">
                <td>study</td>
                <td>20,166</td>
                <td>5.23</td>
              </tr>
              <tr valign="top">
                <td>novaccine</td>
                <td>19,410</td>
                <td>5.04</td>
              </tr>
              <tr valign="top">
                <td>mybodymychoice</td>
                <td>19,166</td>
                <td>4.97</td>
              </tr>
              <tr valign="top">
                <td>informedconsent</td>
                <td>16,578</td>
                <td>4.30</td>
              </tr>
              <tr valign="top">
                <td>depopulation</td>
                <td>15,021</td>
                <td>3.90</td>
              </tr>
              <tr valign="top">
                <td>vaxxed</td>
                <td>12,691</td>
                <td>3.29</td>
              </tr>
              <tr valign="top">
                <td>vaccineinjury</td>
                <td>12,640</td>
                <td>3.28</td>
              </tr>
              <tr valign="top">
                <td>vaccination</td>
                <td>10,873</td>
                <td>2.82</td>
              </tr>
              <tr valign="top">
                <td>arrestbillgates</td>
                <td>9991</td>
                <td>2.59</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Account Collection</title>
        <p>The account collection differs from the streaming collection, as it is focused on historical tweets from a set of accounts. The process of collecting the historical tweets is explained more in detail in the <italic>Methods</italic> section. The current account collection consists of more than 135 million tweets published by over 78,000 unique accounts, and it spans the period from March 3, 2007, to February 8, 2021. In <xref rid="figure3" ref-type="fig">Figure 3</xref>, we illustrate some of the most important statistics from this data collection. The left panel in <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the distribution of the number of tweets per account. Out of 78,954 accounts, 39,350 (49.8%) published fewer than 1500 tweets, 31,581 (40%) of the accounts have more than 2000 tweets, and 1184 (1.5%) have more than 5000 tweets. The right panel in <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the number of tweets over time. Most of the tweets originate in the year 2020, with the oldest tweet dating back to 2007. For 55,267 (70%) of the 78,954 accounts, the oldest collected tweet dates from 2020. There is a significant portion of accounts whose historical tweets date much earlier; for 14,211 (18%) of the 78,954 accounts, the earliest tweet was dated before 2018, and for 5368 (6.8%) of the accounts, the earliest tweet was dated before 2014. This relatively long-spanning collection of historical tweets at the account level may allow for a comprehensive temporal analysis of vaccine hesitancy development on Twitter over several years.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Tweets in the account collection. Left: distribution of tweets per account; right: distribution of tweets over time.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The 15 most common hashtags appearing in the account collection are displayed in <xref ref-type="table" rid="table4">Table 4</xref>. In addition to the common COVID-19–related hashtags, we observe many hashtags referring to US politics. During the period of the US 2020 presidential election and the political campaign, the accounts that appear in our collection were particularly active. Hence, we can see that many politically motivated narratives in the data originated during that period.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Top 15 hashtags in the account collection. The count is the total number of times a hashtag appears, and the proportion quantifies the proportion of tweets that contain a specific hashtag out of all tweets with a hashtag.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="370"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Hashtag</td>
                <td>Count</td>
                <td>Proportion (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>covid19</td>
                <td>474,481</td>
                <td>2.55</td>
              </tr>
              <tr valign="top">
                <td>endsars</td>
                <td>203,297</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td>maga</td>
                <td>164,332</td>
                <td>0.88</td>
              </tr>
              <tr valign="top">
                <td>coronavirus</td>
                <td>158,574</td>
                <td>0.85</td>
              </tr>
              <tr valign="top">
                <td>trump</td>
                <td>156,262</td>
                <td>0.84</td>
              </tr>
              <tr valign="top">
                <td>stopthesteal</td>
                <td>121,069</td>
                <td>0.65</td>
              </tr>
              <tr valign="top">
                <td>trump2020</td>
                <td>115,002</td>
                <td>0.62</td>
              </tr>
              <tr valign="top">
                <td>breaking</td>
                <td>111,274</td>
                <td>0.60</td>
              </tr>
              <tr valign="top">
                <td>obamagate</td>
                <td>110,046</td>
                <td>0.59</td>
              </tr>
              <tr valign="top">
                <td>covid</td>
                <td>106,095</td>
                <td>0.57</td>
              </tr>
              <tr valign="top">
                <td>china</td>
                <td>98,026</td>
                <td>0.53</td>
              </tr>
              <tr valign="top">
                <td>oann</td>
                <td>96,943</td>
                <td>0.52</td>
              </tr>
              <tr valign="top">
                <td>antifa</td>
                <td>79,157</td>
                <td>0.43</td>
              </tr>
              <tr valign="top">
                <td>biden</td>
                <td>77,728</td>
                <td>0.42</td>
              </tr>
              <tr valign="top">
                <td>fakenews</td>
                <td>66,599</td>
                <td>0.36</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>News Sources in the Streaming Collection</title>
        <p>Vaccine hesitancy is usually fueled by misinformation originating from websites with questionable credibility. In <xref rid="figure4" ref-type="fig">Figure 4</xref>, we list the top 10 URLs that can be found in the streaming collection, and we illustrate the number of times each appears. The vast majority of those websites can be found in the Iffy+ database of low credibility sites [<xref ref-type="bibr" rid="ref20">20</xref>]. One of the most commonly shared sources is the website of an American antivaccine group called Learn The Risk; it is known for its campaigns against vaccination, which assert that vaccines are responsible for a large number of deaths of young children. It is followed by Vaccine Impact, a well-known news and information website that promotes pseudoscience; this website often shares antivaccination propaganda and promotes alternative medicine, holism, and alternative nutrition. The only website on the list with high credibility is the website of the National Center for Biotechnology Information (NCBI), a PubMed parent company.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Top 10 news sources in the streaming collection. The URLs of the news aggregators and the large social platforms were omitted.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>News Sources in the Account Collection</title>
        <p>In <xref rid="figure5" ref-type="fig">Figure 5</xref>, we list the top 10 URLs that can be found in the account collection, and we illustrate the number of times each appears. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows that many far-right news media sites appear frequently in the account collection. The Gateway Pundit [<xref ref-type="bibr" rid="ref25">25</xref>], which is known for publishing falsehoods, hoaxes, and conspiracy theories, occurs more than 400,000 times. Other far-right media outlets, such as Breitbart News [<xref ref-type="bibr" rid="ref26">26</xref>] and the Epoch Times [<xref ref-type="bibr" rid="ref27">27</xref>], also appear very often. Considering the sources that usually fall in the group of mainstream news media sites, such as Fox News [<xref ref-type="bibr" rid="ref28">28</xref>] and the <italic>New York Post</italic> [<xref ref-type="bibr" rid="ref29">29</xref>], conspiracy spreaders selectively quote reports from these sources to increase the credibility of often false claims.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Top 10 URLs in the account collection. The URLs of the news aggregators and the large social platforms were omitted.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Political Leanings of the Antivaccination Accounts</title>
        <p>In <xref rid="figure6" ref-type="fig">Figure 6</xref>, we show the distribution of political leanings of the accounts. The political leaning of an account was estimated based on its media diet (see the <italic>Methods</italic> section). The x-axis represents the account’s political leaning and can take any value between “far left” and “far right.” The y-axis is the normalized number of accounts with a corresponding political leaning. The political leaning of the accounts engaged in the antivaccination narratives is shown in orange. We observed a bimodal distribution with a significantly higher right peak. The blue bars illustrate the distribution of the political leanings for random Twitter accounts. The random Twitter accounts are a random sample of approximately 6000 accounts from the previously published Twitter data set related to the US 2020 Presidential election by Chen et al [<xref ref-type="bibr" rid="ref30">30</xref>]. It has been previously shown that the Twitter users are younger on average and more likely to vote Democrat than the general public [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. These results are not surprising, as they align with earlier studies showing that political orientation is a strong predictor of vaccine hesitancy in the United States [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Distributions of the Twitter accounts based on their political leaning and attitude toward vaccination. The political leaning of each account was calculated from its media diet. Anti-vax: antivaccination.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Clusters of Antivaccine Narratives in the Streaming Collection</title>
        <p>To obtain further insights into the provided data set, we explored the clusters of antivaccine narratives by identifying the antivaccine topics that usually co-occurred. We ran the Louvain community detection algorithm on the topic co-occurrence network, as described in the <italic>Methods</italic> section. The topic network is illustrated in <xref rid="figure7" ref-type="fig">Figure 7</xref>. We identified 3 distinct communities; all of them contained antivaccine keywords, but with different focuses on topics. The largest topic community, colored purple, focuses on debunked claims around the conspiracy narrative that the vaccine is a plot by rich people to reduce the world population. The second topic community, colored orange, mostly focuses on vaccine safety, as hashtags such as <italic>#doctorsspeakup</italic>, <italic>#vaccinesafety</italic>, and <italic>#vaccineinjury</italic> appear often. The smallest topic community, in green, contains a mixture of various hashtags that range from strongly antivaccine, such as <italic>#informedconsent</italic>, <italic>#learntherisk</italic>, and <italic>#vaxxed</italic>, to some neutral hashtags, such as <italic>#vaccine</italic>, to some provaccine hashtags, such as <italic>#vaccineswork</italic>.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>An overview of the prominent hashtags in the data set, clustered into 3 communities. The nodes are the hashtags, and the links are drawn between two hashtags that appear together in the same tweet. Clustering was performed using the Louvain algorithm. For readability, we do not show all the node labels.</p>
          </caption>
          <graphic xlink:href="publichealth_v7i11e30642_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this paper, we present a comprehensive data set consisting of tweets related to antivaccination narratives, organized in streaming and account collections. We characterized the data in several ways, including frequencies of prominent keywords, news sources, geographical location of the accounts, and political leaning of the accounts. The streaming collection consists of a random sample of tweets that contain any of the specific keywords promoting strong antivaccination sentiments. This is a common method used to collect Twitter data on vaccination hesitancy and other similar topics [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. It is well understood by academics and is often used to provide useful insights about the chatter on the web about a particular topic in a specific period. The account collection was gathered using a relatively new method of collecting Twitter data by querying the historical activities from a set of tracked accounts. This collection was made possible after Twitter introduced the Academic Research product track API. In this way, by gathering massive amounts of historical tweets, researchers can characterize individual accounts rather than populations on average. This data set will be useful for scientists interested in the demographic and psychographic characteristics of Twitter users who are prone to spreading antivaccination narratives.</p>
        <p>The news sources shared by the users in the streaming collection are predominantly websites with low credibility. However, the most shared URL is the website of the NCBI [<xref ref-type="bibr" rid="ref25">25</xref>], which is part of the United States National Library of Medicine, a branch of the National Institutes of Health. NCBI houses PubMed, the largest bibliographic database for biomedical literature. This finding can create a false impression that the tweets from the streaming collection contain information from legitimate scientific sources. When we examined the context in which those papers were shared, we discovered that most of the papers from PubMed were cited with false and misleading conclusions. Sometimes, antivaccine advocates would share legitimate scientific papers documenting rare side effects of the vaccines, while overemphasizing the observed adverse effects and calling for vaccine boycotts. Sharing a scientific study in a tweet provides an illusion of credibility. Cherry-picking desirable sentences and relying on the fact that most of the audience will not make an effort to read a scientific paper in detail is a very effective strategy for manipulation.</p>
        <p>It is often valuable to know the political affiliation of users who share antivaccine narratives. Knowing users’ position on a political spectrum can be useful in identifying their most likely moral values and possible stances toward specific societal issues. This knowledge can be used to design appropriate future messaging and campaigns. We were able to identify the political affiliation for the accounts collection, as we had enough tweets for each account. Accounts that share common misinformation related to vaccines often share other conspiracy narratives, usually politically charged ones. The population susceptible to such narratives strongly skews conservative [<xref ref-type="bibr" rid="ref18">18</xref>]; therefore, we expected that a large number of accounts in the account collection would be right leaning.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although the data sets give an overview of vaccine hesitancy on Twitter, potential limitations warrant some considerations. First, our streaming collection relies on a defined set of keywords. The antivaccine lingo is constantly evolving as the COVID-19 pandemic unfolds. Although we have made our best efforts to find the most representative keywords, they may not fully cover all antivaccine topics. The set of keywords we used was designed to capture the strongest antivaccine sentiments and may have missed various nuances in the multifaceted nature of vaccine hesitancy. Second, this data set should not be used to draw conclusions for the general population, as the Twitter user population is younger and more politically engaged than the general public [<xref ref-type="bibr" rid="ref31">31</xref>]; this means that our data may be biased in various ways. Additionally, the keywords used for the collection were derived from the English vocabulary, highly biasing the geographical distribution of the tweets toward the English-speaking regions of the world. Finally, to prevent the spread of misleading COVID-19 information, Twitter has enacted specific rules and policies. The accounts violating these rules and policies may be banned by Twitter, making their tweets unreachable. At the time of writing, our estimate is that more than 40% of the accounts in the streaming collection and 30% of accounts in the accounts collection had been either banned or deleted. With each update of the streaming data set, we expect this proportion to change.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>In addition to the streaming collection, which tracks tweets as they appear in real time, perhaps the most important contribution of this study is the account collection, a data set consisting of almost all historical tweets for a sample of users who were actively sharing antivaccination narratives. This data set can be used to provide further insights into the accounts that engage in antivaccine propaganda. Our intention in publishing this paper and data sets is to provide researchers with assets to enable further exploration of issues revolving around vaccine hesitancy and to study them through the lens of social media. The data sets collected and provided here could be useful for researchers interested in tracking the longitudinal characteristics of accounts engaging with antivaccine narratives. It can help provide better insights into the socioeconomic, political, and cultural determinants of vaccine hesitancy.</p>
      </sec>
      <sec>
        <title>Use Notes</title>
        <p>The data set is released in compliance with the Twitter Terms and Conditions and the Developer’s Agreement and Policies [<xref ref-type="bibr" rid="ref12">12</xref>]. Researchers who wish to use this data set must agree to abide by the stipulations stated in the associated license and conform to Twitter’s policies and regulations.</p>
      </sec>
      <sec>
        <title>Data Availability</title>
        <p>The data are available at GitHub [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DARPA</term>
          <def>
            <p>Defense Advanced Research Projects Agency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NCBI</term>
          <def>
            <p>National Center for Biotechnology Information</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors are grateful to the Defense Advanced Research Projects Agency (DARPA), contract W911NF-17-C-0094, for their support. The authors appreciate the support of the Annenberg Foundation.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>All authors conceived and designed the study. GM and YW collected and analyzed the data. All authors wrote and revised the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jacobson</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>Jennifer L</given-names>
            </name>
            <name name-style="western">
              <surname>Finney Rutten</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Vaccine hesitancy</article-title>
          <source>Mayo Clin Proc</source>
          <year>2015</year>
          <month>11</month>
          <volume>90</volume>
          <issue>11</issue>
          <fpage>1562</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2015.09.006</pub-id>
          <pub-id pub-id-type="medline">26541249</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(15)00719-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Vaccine hesitancy: a growing challenge for immunization programmes</article-title>
          <source>World Health Organization</source>
          <year>2015</year>
          <access-date>2021-11-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/news/item/18-08-2015-vaccine-hesitancy-a-growing-challenge-for-immunization-programmes">https://www.who.int/news/item/18-08-2015-vaccine-hesitancy-a-growing-challenge-for-immunization-programmes</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>MacDonald</surname>
              <given-names>NE</given-names>
            </name>
            <collab>SAGE Working Group on Vaccine Hesitancy</collab>
          </person-group>
          <article-title>Diagnosing the determinants of vaccine hesitancy in specific subgroups: The Guide to Tailoring Immunization Programmes (TIP)</article-title>
          <source>Vaccine</source>
          <year>2015</year>
          <month>08</month>
          <day>14</day>
          <volume>33</volume>
          <issue>34</issue>
          <fpage>4176</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0264-410X(15)00502-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2015.04.038</pub-id>
          <pub-id pub-id-type="medline">25896376</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(15)00502-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Freimuth</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hancock</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Musa</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Exploring racial influences on flu vaccine attitudes and behavior: results of a national survey of White and African American adults</article-title>
          <source>Vaccine</source>
          <year>2017</year>
          <month>02</month>
          <day>22</day>
          <volume>35</volume>
          <issue>8</issue>
          <fpage>1167</fpage>
          <lpage>1174</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28126202"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2016.12.046</pub-id>
          <pub-id pub-id-type="medline">28126202</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(16)31271-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5839483</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hancock</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Freimuth</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Measuring vaccine hesitancy, confidence, trust and flu vaccine uptake: results of a national survey of White and African American adults</article-title>
          <source>Vaccine</source>
          <year>2019</year>
          <month>02</month>
          <day>21</day>
          <volume>37</volume>
          <issue>9</issue>
          <fpage>1168</fpage>
          <lpage>1173</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/J.VACCINE.2019.01.033"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2019.01.033</pub-id>
          <pub-id pub-id-type="medline">30709722</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(19)30096-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bohannon</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Exploring the reasons behind parental refusal of vaccines</article-title>
          <source>J Pediatr Pharmacol Ther</source>
          <year>2016</year>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>104</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27199617"/>
          </comment>
          <pub-id pub-id-type="doi">10.5863/1551-6776-21.2.104</pub-id>
          <pub-id pub-id-type="medline">27199617</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869767</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burki</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Vaccine misinformation and social media</article-title>
          <source>Lancet Digit Health</source>
          <year>2019</year>
          <month>10</month>
          <volume>1</volume>
          <issue>6</issue>
          <fpage>e258</fpage>
          <lpage>e259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/https://doi.org/10.1016/S2589-7500(19)30136-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(19)30136-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>AlKulaib</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>Sc</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Weaponized health communication: Twitter bots and Russian trolls amplify the vaccine debate</article-title>
          <source>Am J Public Health</source>
          <year>2018</year>
          <month>10</month>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1378</fpage>
          <lpage>1384</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ajph.aphapublications.org/doi/10.2105/AJPH.2018.304567"/>
          </comment>
          <pub-id pub-id-type="doi">10.2105/AJPH.2018.304567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roozenbeek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dryhurst</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kerr</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Recchia</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>van der Bles</surname>
              <given-names>Anne Marthe</given-names>
            </name>
            <name name-style="western">
              <surname>van der Linden</surname>
              <given-names>Sander</given-names>
            </name>
          </person-group>
          <article-title>Susceptibility to misinformation about COVID-19 around the world</article-title>
          <source>R Soc Open Sci</source>
          <year>2020</year>
          <month>10</month>
          <volume>7</volume>
          <issue>10</issue>
          <fpage>201199</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://royalsocietypublishing.org/doi/10.1098/rsos.201199?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1098/rsos.201199</pub-id>
          <pub-id pub-id-type="medline">33204475</pub-id>
          <pub-id pub-id-type="pii">rsos201199</pub-id>
          <pub-id pub-id-type="pmcid">PMC7657933</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Velásquez</surname>
              <given-names>Nicolas</given-names>
            </name>
            <name name-style="western">
              <surname>Restrepo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Leahy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>El Oud</surname>
              <given-names>Sara</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Minzhang</given-names>
            </name>
            <name name-style="western">
              <surname>Manrique</surname>
              <given-names>Pedro</given-names>
            </name>
            <name name-style="western">
              <surname>Wuchty</surname>
              <given-names>Stefan</given-names>
            </name>
            <name name-style="western">
              <surname>Lupu</surname>
              <given-names>Yonatan</given-names>
            </name>
          </person-group>
          <article-title>The online competition between pro- and anti-vaccination views</article-title>
          <source>Nature</source>
          <year>2020</year>
          <month>06</month>
          <volume>582</volume>
          <issue>7811</issue>
          <fpage>230</fpage>
          <lpage>233</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/https://doi:10.1038/s41586-020-2281-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-020-2281-1</pub-id>
          <pub-id pub-id-type="medline">32499650</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-020-2281-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeVerna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pierri</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Truong</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bollenbacher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Axelrod</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Loynes</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Torres-Lugo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Menczer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bryden</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>CoVaxxy: a global collection of English-language Twitter posts about COVID-19 vaccines</article-title>
          <source>ArXiv</source>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <article-title>Developer agreement and policy 2021</article-title>
          <source>Twitter Developer Platform</source>
          <access-date>2021-09-01</access-date>
          <comment>Preprint posted online on January 19, 2021.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/developer-terms/agreement-and-policy">https://developer.twitter.com/en/developer-terms/agreement-and-policy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Tracking social media discourse about the COVID-19 pandemic: development of a public coronavirus Twitter data set</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>29</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19273</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19273/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19273</pub-id>
          <pub-id pub-id-type="medline">32427106</pub-id>
          <pub-id pub-id-type="pii">v6i2e19273</pub-id>
          <pub-id pub-id-type="pmcid">PMC7265654</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lamsal</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Coronavirus (COVID-19) tweets dataset</article-title>
          <source>IEEE Data Port</source>
          <year>2020</year>
          <access-date>2021-11-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21227/781w-ef42">https://doi.org/10.21227/781w-ef42</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Muric</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>AvaxTweets dataset</article-title>
          <source>GitHub</source>
          <access-date>2021-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/gmuric/avax-tweets-dataset">https://github.com/gmuric/avax-tweets-dataset</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bovet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Makse</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Influence of fake news in Twitter during the 2016 US presidential election</article-title>
          <source>Nat Commun</source>
          <year>2019</year>
          <month>01</month>
          <day>02</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>7</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-018-07761-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-018-07761-2</pub-id>
          <pub-id pub-id-type="medline">30602729</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-018-07761-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6315042</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Badawy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Who falls for online political manipulation?</article-title>
          <source>Companion Proceedings of The 2019 World Wide Web Conference</source>
          <year>2019</year>
          <month>05</month>
          <conf-name>WWW '19: The Web Conference</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>162</fpage>
          <lpage>168</lpage>
          <pub-id pub-id-type="doi">10.1145/3308560.3316494</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Muric</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Characterizing social media manipulation in the 2020 U.S. presidential election</article-title>
          <source>First Monday</source>
          <year>2020</year>
          <month>10</month>
          <day>19</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/https://doi.org/10.5210/fm.v25i11.11431"/>
          </comment>
          <pub-id pub-id-type="doi">10.5210/fm.v25i11.11431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <source>AllSides</source>
          <access-date>2021-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.allsides.com/unbiased-balanced-news">https://www.allsides.com/unbiased-balanced-news</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Iffy+ mis/disinfo sites</article-title>
          <source>Iffy</source>
          <access-date>2021-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iffy.news/iffy-plus/">https://iffy.news/iffy-plus/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <source>Media Bias/Fact Check</source>
          <access-date>2021-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mediabiasfactcheck.com/">https://mediabiasfactcheck.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <article-title>URL shortener</article-title>
          <source>Bitly</source>
          <access-date>2021-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bitly.com/">https://bitly.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>SMAPPNYU/urlExpander: initial release 2018</article-title>
          <source>Zenodo</source>
          <access-date>2021-11-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.5281/zenodo.1345144">https://doi.org/10.5281/zenodo.1345144</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Guillaume</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lambiotte</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lefebvre</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Fast unfolding of communities in large networks</article-title>
          <source>J Stat Mech</source>
          <year>2008</year>
          <month>10</month>
          <day>09</day>
          <volume>2008</volume>
          <issue>10</issue>
          <fpage>P10008</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1088/1742-5468/2008/10/P10008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1088/1742-5468/2008/10/p10008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>The Gateway Pundit</source>
          <access-date>2021-05-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.thegatewaypundit.com/">https://www.thegatewaypundit.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>Breitbart News Network</source>
          <access-date>2021-05-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.breitbart.com/">https://www.breitbart.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <source>The Epoch Times</source>
          <access-date>2021-05-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theepochtimes.com/">https://www.theepochtimes.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <source>Fox News</source>
          <access-date>2021-05-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.foxnews.com/">https://www.foxnews.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <source>New York Post</source>
          <access-date>2021-05-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nypost.com/">https://nypost.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Deb</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>#Election2020: the first public Twitter dataset on the 2020 US Presidential election</article-title>
          <source>J Comput Soc Sci</source>
          <year>2021</year>
          <month>04</month>
          <day>02</day>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33824934"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s42001-021-00117-9</pub-id>
          <pub-id pub-id-type="medline">33824934</pub-id>
          <pub-id pub-id-type="pii">117</pub-id>
          <pub-id pub-id-type="pmcid">PMC8017518</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wojcik</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Sizing Up Twitter Users</article-title>
          <source>Pew Research Center</source>
          <year>2019</year>
          <month>04</month>
          <day>24</day>
          <access-date>2021-05-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/">https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eady</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nagler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guess</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zilinsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How many people live in political bubbles on social media? Evidence from linked survey and Twitter data</article-title>
          <source>SAGE Open</source>
          <year>2019</year>
          <month>02</month>
          <day>28</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>215824401983270</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1177/2158244019832705"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2158244019832705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fridman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gershon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gneezy</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and vaccine hesitancy: a longitudinal study</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>e0250123</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0250123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0250123</pub-id>
          <pub-id pub-id-type="medline">33861765</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-35660</pub-id>
          <pub-id pub-id-type="pmcid">PMC8051771</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruiz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Predictors of intention to vaccinate against COVID-19: results of a nationwide survey</article-title>
          <source>Vaccine</source>
          <year>2021</year>
          <month>02</month>
          <day>12</day>
          <volume>39</volume>
          <issue>7</issue>
          <fpage>1080</fpage>
          <lpage>1086</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33461833"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2021.01.010</pub-id>
          <pub-id pub-id-type="medline">33461833</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(21)00014-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7794597</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Tracking mental health and symptom mentions on Twitter during COVID-19</article-title>
          <source>J Gen Intern Med</source>
          <year>2020</year>
          <volume>35</volume>
          <fpage>2798</fpage>
          <lpage>2800</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/S11606-020-05988-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-020-05988-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gebali</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>COVID-19-FAKES: a Twitter (Arabic/English) dataset for detecting misleading information on COVID-19</article-title>
          <source>INCoS 2020. Advances in Intelligent Systems and Computing, vol 1263</source>
          <year>2021</year>
          <conf-name>The 12th International Conference on Intelligent Networking and Collaborative Systems (INCoS-2020)</conf-name>
          <conf-date>August 31-September 2, 2020</conf-date>
          <conf-loc>Victoria, BC</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-57796-4_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gargiulo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cafiero</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Guille-Escuret</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Seror</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Asymmetric participation of defenders and critics of vaccines to debates on French-speaking Twitter</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>04</month>
          <day>20</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>6599</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-62880-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-62880-5</pub-id>
          <pub-id pub-id-type="medline">32313016</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-62880-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7171088</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Surian</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perry</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kelaher</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Comparing human papillomavirus vaccine concerns on Twitter: a cross-sectional study of users in Australia, Canada and the UK</article-title>
          <source>BMJ Open</source>
          <year>2017</year>
          <month>10</month>
          <day>05</day>
          <volume>7</volume>
          <issue>10</issue>
          <fpage>e016869</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=28982821"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2017-016869</pub-id>
          <pub-id pub-id-type="medline">28982821</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2017-016869</pub-id>
          <pub-id pub-id-type="pmcid">PMC5640044</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Surian</surname>
              <given-names>Didi</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Characterizing Twitter discussions about HPV vaccines using topic modeling and community detection</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>08</month>
          <day>29</day>
          <volume>18</volume>
          <issue>8</issue>
          <fpage>e232</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/8/e232/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6045</pub-id>
          <pub-id pub-id-type="medline">27573910</pub-id>
          <pub-id pub-id-type="pii">v18i8e232</pub-id>
          <pub-id pub-id-type="pmcid">PMC5020315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Featherstone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Millam</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Exploring childhood anti-vaccine and pro-vaccine communities on twitter – a perspective from influential users</article-title>
          <source>Online Soc Netw Media</source>
          <year>2020</year>
          <month>11</month>
          <volume>20</volume>
          <fpage>100105</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/J.OSNEM.2020.100105"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.osnem.2020.100105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gunaratne</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Coomes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Haghbayan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Temporal trends in anti-vaccine discourse on Twitter</article-title>
          <source>Vaccine</source>
          <year>2019</year>
          <month>08</month>
          <day>14</day>
          <volume>37</volume>
          <issue>35</issue>
          <fpage>4867</fpage>
          <lpage>4871</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.vaccine.2019.06.086"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2019.06.086</pub-id>
          <pub-id pub-id-type="medline">31300292</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(19)30876-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tomeny</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vargo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>El-Toukhy</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Geographic and demographic correlates of autism-related anti-vaccine beliefs on Twitter, 2009-15</article-title>
          <source>Soc Sci Med</source>
          <year>2017</year>
          <month>10</month>
          <volume>191</volume>
          <fpage>168</fpage>
          <lpage>175</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28926775"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2017.08.041</pub-id>
          <pub-id pub-id-type="medline">28926775</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(17)30522-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5623105</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
