<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="brief-report" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v1i1e5</article-id>
      <article-id pub-id-type="pmid">27014744</article-id>
      <article-id pub-id-type="doi">10.2196/publichealth.4472</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Short Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Short Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Social Media to Perform Local Influenza Surveillance in an Inner-City Hospital: A Retrospective Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mciver</surname>
            <given-names>David</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1" corresp="yes">
          <name name-style="western">
            <surname>Broniatowski</surname>
            <given-names>David Andre</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Engineering Management and Systems Engineering</institution>
            <institution>The George Washington University</institution>
            <addr-line>Science and Engineering Hall</addr-line>
            <addr-line>800 22nd Street NW, #2700</addr-line>
            <addr-line>Washington, DC, 20052</addr-line>
            <country>United States</country>
            <phone>1 2029943751</phone>
            <fax>1 2029943751</fax>
            <email>broniatowski@gwu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3302-9497</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2">
          <name name-style="western">
            <surname>Dredze</surname>
            <given-names>Mark</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0422-2474</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3">
          <name name-style="western">
            <surname>Paul</surname>
            <given-names>Michael J</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9149-7539</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4">
          <name name-style="western">
            <surname>Dugas</surname>
            <given-names>Andrea</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7164-014X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <sup>1</sup>
        <institution>Department of Engineering Management and Systems Engineering</institution>
        <institution>The George Washington University</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <sup>2</sup>
        <institution>Human Language Technology Center of Excellence</institution>
        <institution>Johns Hopkins University</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <sup>3</sup>
        <institution>Department of Computer Science</institution>
        <institution>Johns Hopkins University</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <sup>4</sup>
        <institution>Department of Emergency Medicine</institution>
        <institution>Johns Hopkins University</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: David Andre Broniatowski 
        <email>broniatowski@gwu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jan-Jun</season>
        <year>2015</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>05</month>
        <year>2015</year>
      </pub-date>
      <volume>1</volume>
      <issue>1</issue>
      <elocation-id>e5</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>25</day>
          <month>03</month>
          <year>2015</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>04</month>
          <year>2015</year>
        </date>
        <date date-type="rev-recd">
          <day>04</day>
          <month>05</month>
          <year>2015</year>
        </date>
        <date date-type="accepted">
          <day>05</day>
          <month>05</month>
          <year>2015</year>
        </date>
      </history>
      <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
      <copyright-statement>©David Andre Broniatowski, Mark Dredze, Michael J Paul, Andrea Dugas. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 29.05.2015.</copyright-statement>
      <copyright-year>2015</copyright-year>
      <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://publichealth.jmir.org/2015/1/e5/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Public health officials and policy makers in the United States expend significant resources at the national, state, county, and city levels to measure the rate of influenza infection. These individuals rely on influenza infection rate information to make important decisions during the course of an influenza season driving vaccination campaigns, clinical guidelines, and medical staffing. Web and social media data sources have emerged as attractive alternatives to supplement existing practices. While traditional surveillance methods take 1-2 weeks, and significant labor, to produce an infection estimate in each locale, web and social media data are available in near real-time for a broad range of locations.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The objective of this study was to analyze the efficacy of flu surveillance from combining data from the websites Google Flu Trends and HealthTweets at the local level. We considered both emergency department influenza-like illness cases and laboratory-confirmed influenza cases for a single hospital in the City of Baltimore.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This was a retrospective observational study comparing estimates of influenza activity of Google Flu Trends and Twitter to actual counts of individuals with laboratory-confirmed influenza, and counts of individuals presenting to the emergency department with influenza-like illness cases. Data were collected from November 20, 2011 through March 16, 2014. Each parameter was evaluated on the municipal, regional, and national scale. We examined the utility of social media data for tracking actual influenza infection at the municipal, state, and national levels. Specifically, we compared the efficacy of Twitter and Google Flu Trends data.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that municipal-level Twitter data was more effective than regional and national data when tracking actual influenza infection rates in a Baltimore inner-city hospital. When combined, national-level Twitter and Google Flu Trends data outperformed each data source individually. In addition, influenza-like illness data at all levels of geographic granularity were best predicted by national Google Flu Trends data.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>In order to overcome sensitivity to transient events, such as the news cycle, the best-fitting Google Flu Trends model relies on a 4-week moving average, suggesting that it may also be sacrificing sensitivity to transient fluctuations in influenza infection to achieve predictive power. Implications for influenza forecasting are discussed in this report.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>Web mining</kwd>
        <kwd>social computing</kwd>
        <kwd>time series analysis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Public health officials and policy makers rely on influenza infection rate information to make important decisions during the course of an influenza season. Whereas influenza surveillance has traditionally been conducted using laboratory data, hospitalizations, and physician visits for influenza-like illness (ILI), web and social media data sources have emerged as attractive alternatives to supplement existing practices. While traditional surveillance methods take 1-2 weeks, and significant labor, to produce an infection estimate in each locale, web and social media data are available in near real-time for a broad range of locations. Studies have demonstrated that web queries [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>], Twitter messages [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref12">12</xref>], and other sources (eg, Wikipedia [<xref ref-type="bibr" rid="ref13">13</xref>], mobile app reporting [<xref ref-type="bibr" rid="ref14">14</xref>]) may be productively mined for influenza surveillance data. New resources like Google Flu Trends [<xref ref-type="bibr" rid="ref1">1</xref>], HealthTweets [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>](<xref ref-type="fig" rid="figure1">Figure 1</xref>), and Flu Near You [<xref ref-type="bibr" rid="ref14">14</xref>] deliver near-real time estimates of infection rates.</p>
      <p>However, few have examined the efficacy of local surveillance [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. In this study, we analyzed the efficacy of local flu surveillance from Google Flu Trends and HealthTweets. Whereas previous studies that considered either Google or Twitter in isolation, we evaluated multiple trends available from both. Furthermore, instead of restricting our study to hospitals designated as ILI sentinels, or emergency department ILI rates, we considered both emergency department ILI and laboratory-confirmed influenza cases for a single hospital in the city of Baltimore. This enabled us to evaluate the impact on specific care centers when making influenza response decisions, such as staffing and resource allocation.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Screenshot of HealthTweets.</p>
        </caption>
        <graphic xlink:href="publichealth_v1i1e5_fig1.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Population and Setting</title>
        <p>This was a retrospective observational study comparing estimates of influenza activity from Google flu trends and Twitter to actual counts of individuals with laboratory-confirmed influenza, and counts of individuals presenting to the emergency department with ILI. Each parameter was evaluated on the municipal, regional, and national scale.</p>
      </sec>
      <sec>
        <title>Data Collection and Methods of Measurement</title>
        <p>Data were collected from November 20, 2011 through March 16, 2014. All measurements were recorded weekly to allow for direct comparison between data sources. Following the Centers for Disease Control (CDC) Convention, each week summed the data points from Sunday through the following Saturday. The number of municipal- (city) level subjects was estimated by evaluating the number of patients presenting to an urban academic emergency department in Baltimore, Maryland with an annual volume of over 60,000 adult and 24,000 pediatric visits. The number of confirmed influenza cases was determined by summing the number of emergency department visits with laboratory-confirmed influenza that occurred during each week. Similarly, the number of patients with ILI was determined by summing the number of emergency department patients who reported fever with cough or sore throat each week. Regional data were collected via the CDC surveillance reports for health and Human Services (HHS) Region 3, including both the percentage of patients reporting ILI and the percentage of tests positive for influenza. National data were collected from the CDC surveillance report of the nationwide percentage of patients reporting ILI and the total percentage of patients testing positive for influenza.</p>
        <p>Google Flu Trends data for the United States, the state of Maryland, and the city of Baltimore were downloaded directly from the Google Flu Trends website [<xref ref-type="bibr" rid="ref19">19</xref>]. Twitter data for the same three locations was obtained from the HealthTweets website [<xref ref-type="bibr" rid="ref15">15</xref>], an online platform for public health surveillance aimed at sharing the latest research results on Twitter data with the scientific community and public officials. The underlying data were generated using a sequence of supervised machine-learning algorithms [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], namely logistic regression classifiers, the first of which identified tweets that were relevant to health. Next, tweets that were about influenza were isolated. The final classifier separated tweets that were about reported influenza infection from those that only reported awareness of the flu. The tweets indicating influenza infection constituted our dataset. Message locations were identified using Carmen [<xref ref-type="bibr" rid="ref20">20</xref>], a software package that infers tweet locations using Global Positioning System (GPS) coordinates and self-reported locations from the free text of the user biographic profiles.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>Data were analyzed by evaluating weekly trends over time using the Box-Jenkins procedure [<xref ref-type="bibr" rid="ref21">21</xref>] applied to each data source (influenza tests at our medical center, ILI at our medical center, % reported flu cases in HHS region 3 and the USA, and % reported ILI in HHS region 3 and the USA) in order to control for autocorrelation in the corresponding time series. We next fit an autoregressive integrated moving average model with exogenous covariates (ARIMAX) to each data time series, X<sub>t</sub>, where p, d, and q, are the respective autoregressive, differencing, and moving average orders of the model (<xref ref-type="fig" rid="figure2">Figure 2</xref> , part a). The φ<sub>i</sub>and θ<sub>i</sub>are the autoregressive and moving average parameters, respectively, ε<sub>t</sub>is a normally distributed error term with a mean of 0, L is a lag operator defined as in <xref ref-type="fig" rid="figure2">Figure 2</xref> , part b, and m<sub>t</sub>is defined as in <xref ref-type="fig" rid="figure2">Figure 2</xref> , part c, where y<sub>t</sub>is a series of predictors (eg, Twitter and/or Google Flu Trends data), the η<sub>i</sub>are a series of predictor weights, and b is the total number of predictor time series.</p>
        <p>We chose the autoregressive, differencing, and moving average terms of each model that minimized each its Aikake Information Criterion (AIC) subject to the constraint that each model used the same degree of differencing for each data source. This constraint was imposed to enable comparison across social media predictors (ie, Twitter, Google Flu Trends, or both). All statistics were conducted using the R Project for Statistical Computing, version 3.0.2 (The R Foundation for Statistical Computing). Specifically, we used the "arima()” function in the forecast package [<xref ref-type="bibr" rid="ref22">22</xref>]. Parameter selection was informed by the “auto.arima()” function, using the Hyndman and Khandakar algorithm [<xref ref-type="bibr" rid="ref23">23</xref>]. Deviations from the algorithm’s output were then examined by hand and parameters that deviated from algorithm output were chosen if they minimized AIC.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Equations defining the ARIMAX model.</p>
          </caption>
          <graphic xlink:href="publichealth_v1i1e5_fig2.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> summarizes the results of each ARIMA model incorporating Twitter and Google Flu Trends data. Our results show that Baltimore-area Twitter data provided a better estimate of actual influenza cases reported in the Baltimore metropolitan area when compared to state- and national-level Twitter data (see <xref ref-type="fig" rid="figure3">Figure 3</xref>). Furthermore, a combination of Twitter and Google Flu Trends data sources outperformed either Twitter or Google Flu Trends individually when predicting actual influenza outbreaks at municipal and regional levels.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Log-likelihood (AIC<sup>a</sup>) for each surveillance method.</p>
        </caption>
        <table width="1000" border="0" cellpadding="7" cellspacing="0" rules="groups" frame="hsides">
          <col width="20"/>
          <col width="60"/>
          <col width="80"/>
          <col width="80"/>
          <col width="80"/>
          <col width="80"/>
          <col width="80"/>
          <col width="80"/>
          <thead>
            <tr valign="top">
              <td><break/></td>
              <td><break/></td>
              <td colspan="3">Laboratory-confirmed influenza</td>
              <td colspan="3">Influenza like illness (ILI)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td><break/></td>
              <td>City</td>
              <td>Region</td>
              <td>US</td>
              <td>City</td>
              <td>Region</td>
              <td>US</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="bottom">
              <td colspan="2"><bold>Twitter</bold><sup>b</sup></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>US<sup>c</sup></td>
              <td>-311 (627)<sup>0,1,0e</sup></td>
              <td>-317<sup>g</sup>(653)<sup>5,1,3</sup></td>
              <td>-235<sup>g</sup>(484)<sup>0,1,5</sup></td>
              <td>-502<sup>g</sup>(1009)<sup>0,2,1</sup></td>
              <td>-66<sup>g</sup>(143)<sup>0,1,0</sup></td>
              <td>-27<sup>g</sup>(61)<sup>1,1,1</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>MD<sup>d</sup></td>
              <td>-310 (624)<sup>0,1,0</sup></td>
              <td>-321 (661)<sup>5,1,3</sup></td>
              <td>-236 (486)<sup>0,1,5</sup></td>
              <td>-503 (1012)<sup>0,1,0</sup></td>
              <td>-70 (144)<sup>0,1,0</sup></td>
              <td>-30 (68)<sup>1,1,1</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>Baltimore</td>
              <td>-308<sup>g</sup>(620)<sup>0,1,0</sup></td>
              <td>-323 (666)<sup>5,1,3</sup></td>
              <td>-235 (484)<sup>0,1,5</sup></td>
              <td>-504 (1013)<sup>0,2,1</sup></td>
              <td>-74 (158)<sup>0,1,3</sup></td>
              <td>-32 (74)<sup>1,1,1</sup></td>
            </tr>
            <tr valign="top">
              <td colspan="4"><bold>Google Flu Trends</bold></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>US</td>
              <td>-291<sup>g</sup>(596)<sup>1,1,4</sup></td>
              <td>-313<sup>g</sup>(648)<sup>5,1,4</sup></td>
              <td>-230<sup>f,g</sup>(475)<sup>0,1,5</sup></td>
              <td>-494<sup>f,g</sup>(1002)<sup>1,2,4</sup></td>
              <td>-49<sup>f,g</sup>(110)<sup>0,1,4</sup></td>
              <td>-1<sup>f,g</sup>(15)<sup>1,1,4</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>MD</td>
              <td>-299 (612)<sup>1,1,4</sup></td>
              <td>-318 (656)<sup>5,1,3</sup></td>
              <td>-236 (486)<sup>0,1,5</sup></td>
              <td>-498 (1010)<sup>1,2,4</sup></td>
              <td>-58 (129)<sup>0,1,4</sup></td>
              <td>-27 (61)<sup>1,1,1</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>Baltimore</td>
              <td>-295 (604)<sup>1,1,4</sup></td>
              <td>-320 (660)<sup>5,1,3</sup></td>
              <td>-236 (486)<sup>0,1,5</sup></td>
              <td>-495 (1005)<sup>1,2,4</sup></td>
              <td>-60 (132)<sup>0,1,4</sup></td>
              <td>-23 (56)<sup>1,1,2</sup></td>
            </tr>
            <tr valign="top">
              <td colspan="2"><bold>Both</bold></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
              <td><break/></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>US</td>
              <td>-289<sup>f,g</sup>(594)<sup>1,1,4</sup></td>
              <td>-312<sup>f,g</sup>(646)<sup>5,1,3</sup></td>
              <td>-230<sup>g</sup>(477)<sup>0,1,5</sup></td>
              <td>-495<sup>g</sup>(1003)<sup>0,1,4</sup></td>
              <td>-49<sup>g</sup>(112)<sup>0,1,4</sup></td>
              <td>-0<sup>g</sup>(17)<sup>1,1,4</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>MD</td>
              <td>-299 (613)<sup>1,14</sup></td>
              <td>-318 (657)<sup>5,1,3</sup></td>
              <td>-235 (485)<sup>0,1,5</sup></td>
              <td>-498 (1011)<sup>1,2,4</sup></td>
              <td>-58 (130)<sup>0,1,4</sup></td>
              <td>-27 (68)<sup>1,1,1</sup></td>
            </tr>
            <tr valign="bottom">
              <td><break/></td>
              <td>Baltimore</td>
              <td>-294 (604)<sup>1,1,4</sup></td>
              <td>-319 (659)<sup>5,1,3</sup></td>
              <td>-235 (486)<sup>0,1,5</sup></td>
              <td>-500 (1007)<sup>0,2,1</sup></td>
              <td>-60 (134)<sup>0,1,4</sup></td>
              <td>-22 (55)<sup>1,1,2</sup></td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup> <italic>a</italic> </sup>AIC=Aikake Information Criterion</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>Twitter data from the HealthTweets website.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>US=United States</p>
          </fn>
          <fn id="table1fn4">
            <p><sup>d</sup>MD=Maryland</p>
          </fn>
          <fn id="table1fn5">
            <p><sup>e</sup>Superscript numerals indicate the autoregressive order, the order of differencing, and the moving average order, respectively. Models were chosen to minimize AIC, guided by examinations of autocorrelation and partial autocorrelation values.</p>
          </fn>
          <fn id="table1fn6">
            <p><sup>f</sup>The best predictor across all data sources.</p>
          </fn>
          <fn id="table1fn7">
            <p><sup>g</sup>The best predictor within each data source (HealthTweets website, Google, or a linear combination of both).</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Plot of weekly confirmed influenza cases (right axis) as compared to standardized Baltimore social media data (left axis).</p>
        </caption>
        <graphic xlink:href="publichealth_v1i1e5_fig3.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>When directly comparing models that rely only on one data source (ie, Twitter or Google Flu Trends but not both), we found that the best-fitting Twitter models were simple whereas the best-fitting Google Flu Trends models generally required more parameters. For example, at the municipal level, the best-fitting Twitter model did not require any autoregressive or moving average terms, whereas the best-fitting Google Flu Trends model required a 4-week moving average of Google Flu Trends data and an autoregressive term. In general, these more complex Google Flu Trends models outperformed the best-fitting Twitter models. Although these Google Flu Trends models were significantly more complex (ie, one must fit more parameters), they had a lower AIC, indicating that they were also more informative.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Consistent with prior work [<xref ref-type="bibr" rid="ref18">18</xref>], we found that national-level Google Flu Trends data may be used to track actual influenza cases in the Baltimore area. The fact that a combination of Twitter and Google Flu Trends data at the national (US) level outperformed all other data sources for local and regional confirmed influenza cases indicates that these data sources are not redundant and that Twitter data are contributing information useful to influenza surveillance that are not captured by the corresponding Google Flu Trends data.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Whereas prior work using Google Flu Trends data has largely focused on US ILI data, we extended this finding to multiple levels of geographic granularity by examining social media surveillance at the regional and city levels as well. We found that US Google Flu Trends data best explained ILI rates at all levels (including the municipal level, see <xref ref-type="fig" rid="figure4">Figure 4</xref>). This contrasts with prior research, which found that Google Flu Trends data conflated signals of influenza awareness (eg, media attention) with signals of actual infection - overestimating the flu season’s peak prevalence. In addition, this prior work found that there was insufficient control for temporal autocorrelation and a lack of analysis of Google Flu Trends data at local, rather than national, levels [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Plot of weekly influenza-like illness cases (right axis) as compared to standardized US social media data (left axis).</p>
          </caption>
          <graphic xlink:href="publichealth_v1i1e5_fig4.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In this study, we controlled for autocorrelation and exogenous temporal factors using an ARIMAX model. The improved performance of this model might be an indication that the 4-week moving average terms are smoothing out fluctuations due to the news cycle. Nevertheless, because Google Flu Trends data do not explicitly differentiate between signals of influenza awareness and actual infection, this relatively complicated model may buy accuracy at the cost of sensitivity to transient phenomena. Thus, temporary spikes in media coverage are smoothed out, but so would temporary spikes in influenza infection.</p>
        <p>Elsewhere, we have shown that our Twitter data overcome the limitations identified in prior Google Flu Trends studies by filtering out signals of influenza awareness from signals of actual infection and enabling analysis at multiple levels of geographic granularity [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Furthermore, the fact that the Twitter model is more lightweight means that it is more able to correctly track transient increases in infection when they occur [<xref ref-type="bibr" rid="ref12">12</xref>]. Finally, municipal-level Twitter data provided a better account of actual influenza cases in Baltimore than did state- or national- level data. This finding is consistent with prior work [<xref ref-type="bibr" rid="ref12">12</xref>] showing that local Twitter data does contribute information that is useful for municipal surveillance. In contrast, state- and local-level Google Flu Trends data did not improve surveillance when compared to national GFT data.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One limitation of our approach is that it only relies upon one municipality. Furthermore, our analysis only examined three seasons of influenza data, one of which (the 2012-2013 season) is known to have been anomalous. Future work should therefore focus on incorporating data from multiple influenza seasons.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Overall, our results motivate the need for future work examining how social media may be used to track measures relevant to influenza surveillance in multiple different locations and seasons.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AIC</term>
          <def>
            <p>Aikake information criterion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ARIMA</term>
          <def>
            <p>Autoregressive integrated moving average</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CDC</term>
          <def>
            <p>Centers for Disease Control</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HHS</term>
          <def>
            <p>Health and Human Systems</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ILI</term>
          <def>
            <p>Influenza-like illness</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>DA Broniatowski and M Dredze were supported in part by the National Institutes of Health under award number 1R01GM114771-01. MJ Paul was supported by a PhD fellowship from Microsoft Research.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>M Dredze and MJ Paul serve on the advisory board of SickWeather. There are no other conflicts of interest.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ginsberg</surname>
              <given-names>Jeremy</given-names>
            </name>
            <name name-style="western">
              <surname>Mohebbi</surname>
              <given-names>Matthew H</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>Rajan S</given-names>
            </name>
            <name name-style="western">
              <surname>Brammer</surname>
              <given-names>Lynnette</given-names>
            </name>
            <name name-style="western">
              <surname>Smolinski</surname>
              <given-names>Mark S</given-names>
            </name>
            <name name-style="western">
              <surname>Brilliant</surname>
              <given-names>Larry</given-names>
            </name>
          </person-group>
          <article-title>Detecting influenza epidemics using search engine query data</article-title>
          <source>Nature</source>
          <year>2009</year>
          <month>02</month>
          <day>19</day>
          <volume>457</volume>
          <issue>7232</issue>
          <fpage>1012</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1038/nature07634</pub-id>
          <pub-id pub-id-type="medline">19020500</pub-id>
          <pub-id pub-id-type="pii">nature07634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Polgreen</surname>
              <given-names>Philip M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Yiling</given-names>
            </name>
            <name name-style="western">
              <surname>Pennock</surname>
              <given-names>David M</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>Forrest D</given-names>
            </name>
          </person-group>
          <article-title>Using internet searches for influenza surveillance</article-title>
          <source>Clin Infect Dis</source>
          <year>2008</year>
          <month>12</month>
          <day>1</day>
          <volume>47</volume>
          <issue>11</issue>
          <fpage>1443</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cid.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=18954267"/>
          </comment>
          <pub-id pub-id-type="doi">10.1086/593098</pub-id>
          <pub-id pub-id-type="medline">18954267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Qingyu</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>Elaine O</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>Benfu</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Geng</given-names>
            </name>
            <name name-style="western">
              <surname>Chunara</surname>
              <given-names>Rumi</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>John S</given-names>
            </name>
          </person-group>
          <article-title>Monitoring influenza epidemics in china with search query from baidu</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>e64323</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0064323"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0064323</pub-id>
          <pub-id pub-id-type="medline">23750192</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-00331</pub-id>
          <pub-id pub-id-type="pmcid">PMC3667820</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Culotta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Towards detecting influenza epidemics by analyzing Twitter messages</article-title>
          <year>2010</year>
          <conf-name>Proc First Workshop on Social Media Analytics : 115-122</conf-name>
          <conf-date>2010</conf-date>
          <conf-loc>New York, NY, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1964858.1964874</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>You are what you Tweet: Analyzing Twitter for public health</article-title>
          <year>2011</year>
          <conf-name>ICWSM</conf-name>
          <conf-date>2011</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>265</fpage>
          <lpage>272</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lampos</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Cristianini</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Nowcasting events from the social web with statistical learning</article-title>
          <source>ACM Transactions on Intelligent Systems and Technology (TIST)</source>
          <year>2012</year>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>72</fpage>
          <pub-id pub-id-type="doi">10.1145/2337542.2337557</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>How Social Media Will Change Public Health</article-title>
          <source>IEEE Intell. Syst</source>
          <year>2012</year>
          <month>07</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>81</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1109/MIS.2012.76</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>Cynthia</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>Gunther</given-names>
            </name>
          </person-group>
          <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
          <source>PLoS One</source>
          <year>2010</year>
          <volume>5</volume>
          <issue>11</issue>
          <fpage>e14118</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0014118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
          <pub-id pub-id-type="medline">21124761</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessing vaccination sentiments with online social media: implications for infectious disease dynamics and control</article-title>
          <source>PLoS computational biology</source>
          <year>2011</year>
          <volume>7</volume>
          <issue>10</issue>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002199</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lamb</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Separating Fact from Fear: Tracking Flu Infections on Twitter</article-title>
          <source>HLT-NAACL</source>
          <year>2013</year>
          <conf-name>HLT-NAACL</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Atlanta, Georgia, USA</conf-loc>
          <fpage>789</fpage>
          <lpage>795</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gesualdo</surname>
              <given-names>Francesco</given-names>
            </name>
            <name name-style="western">
              <surname>Stilo</surname>
              <given-names>Giovanni</given-names>
            </name>
            <name name-style="western">
              <surname>Agricola</surname>
              <given-names>Eleonora</given-names>
            </name>
            <name name-style="western">
              <surname>Gonfiantini</surname>
              <given-names>Michaela V</given-names>
            </name>
            <name name-style="western">
              <surname>Pandolfi</surname>
              <given-names>Elisabetta</given-names>
            </name>
            <name name-style="western">
              <surname>Velardi</surname>
              <given-names>Paola</given-names>
            </name>
            <name name-style="western">
              <surname>Tozzi</surname>
              <given-names>Alberto E</given-names>
            </name>
          </person-group>
          <article-title>Influenza-like illness surveillance on Twitter through automated learning of naïve language</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e82489</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0082489"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0082489</pub-id>
          <pub-id pub-id-type="medline">24324799</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-24070</pub-id>
          <pub-id pub-id-type="pmcid">PMC3853203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>David A</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>Michael J</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>Mark</given-names>
            </name>
          </person-group>
          <article-title>National and local influenza surveillance through Twitter: an analysis of the 2012-2013 influenza epidemic</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e83672</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0083672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0083672</pub-id>
          <pub-id pub-id-type="medline">24349542</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-35058</pub-id>
          <pub-id pub-id-type="pmcid">PMC3857320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McIver</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Wikipedia usage estimates prevalence of influenza-like illness in the United States in near real-time</article-title>
          <source>PLoS computational biology</source>
          <year>2014</year>
          <volume>10</volume>
          <issue>4</issue>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1003581</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chunara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Smolinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Flu near you: an online self-reported influenza surveillance system in the USA</article-title>
          <source>Online Journal of Public Health Informatics</source>
          <year>2013</year>
          <volume>5</volume>
          <issue>1</issue>
          <pub-id pub-id-type="medline">PMC3692780</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>HealthTweets. org: A Platform for Public Health Surveillance using Twitter</article-title>
          <source>Workshops at the Twenty-Eighth AAAI Conference on Artificial Intelligence</source>
          <year>2014</year>
          <conf-name>AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Quebec City, Quebec, Canada</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <source>HealthTweets.org</source>
          <access-date>2015-05-22</access-date>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.healthtweets.org/accounts/login/?next=/">http://www.healthtweets.org/accounts/login/?next=/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6YhoN4Fak"/>
          </comment>
          <!-- <pub-id pub-id-type="other">6YhoN4Fak</pub-id> -->
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagel</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Tsou</surname>
              <given-names>M-H</given-names>
            </name>
            <name name-style="western">
              <surname>Spitzberg</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gawron</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J-A</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peddecord</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Lindsay</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The complex relationship of realspace events and messages in cyberspace: Case study of influenza and pertussis using tweets 2013</article-title>
          <source>JMIR</source>
          <year>2013</year>
          <volume>15</volume>
          <issue>10</issue>
          <pub-id pub-id-type="doi">10.2196/jmir.2705</pub-id>
          <pub-id pub-id-type="medline">PMC3841359</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>Andrea Freyer</given-names>
            </name>
            <name name-style="western">
              <surname>Jalalpour</surname>
              <given-names>Mehdi</given-names>
            </name>
            <name name-style="western">
              <surname>Gel</surname>
              <given-names>Yulia</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>Scott</given-names>
            </name>
            <name name-style="western">
              <surname>Torcaso</surname>
              <given-names>Fred</given-names>
            </name>
            <name name-style="western">
              <surname>Igusa</surname>
              <given-names>Takeru</given-names>
            </name>
            <name name-style="western">
              <surname>Rothman</surname>
              <given-names>Richard E</given-names>
            </name>
          </person-group>
          <article-title>Influenza forecasting with Google Flu Trends</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e56176</fpage>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0056176"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0056176</pub-id>
          <pub-id pub-id-type="medline">23457520</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-29961</pub-id>
          <pub-id pub-id-type="pmcid">PMC3572967</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <source>Google Flu Trends</source>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.google.org/flutrends/us/#US">https://www.google.org/flutrends/us/#US</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6YhoaMjpP"/>
          </comment>
          <!-- <pub-id pub-id-type="other">6YhoaMjpP</pub-id> -->
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bergsma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Carmen: A twitter geolocation system with applications to public health</article-title>
          <year>2013</year>
          <month>06</month>
          <conf-name>AAAI Workshop on Expanding the Boundaries of Health Informatics Using AI (HIAI)</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Bellevue, WA</conf-loc>
          <fpage>20</fpage>
          <lpage>24</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box</surname>
              <given-names>GEP</given-names>
            </name>
            <name name-style="western">
              <surname>Jenkins</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Reinsel</surname>
              <given-names>GC</given-names>
            </name>
          </person-group>
          <source>Time series analysis: forecasting and control</source>
          <year>2008</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hyndman</surname>
              <given-names>Rob J</given-names>
            </name>
            <name name-style="western">
              <surname>Khandakar</surname>
              <given-names>Yeasmin</given-names>
            </name>
          </person-group>
          <article-title>Automatic Time Series Forecasting: The forecast Package for R</article-title>
          <source>Journal of Statistical Software</source>
          <year>2008</year>
          <volume>27</volume>
          <issue>3</issue>
          <comment>
            <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://webdoc.sub.gwdg.de/ebook/serien/e/monash_univ/wp6-07.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hyndman</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Khandakar</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>No 6/07 2007. Monash University, Department of Econometrics and Business Statistics</source>
          <year>2007</year>
          <access-date>2015-05-19</access-date>
          <comment>Automatic time series for forecasting: The forecast package for R
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://webdoc.sub.gwdg.de/ebook/serien/e/monash_univ/wp6-07.pdf">http://webdoc.sub.gwdg.de/ebook/serien/e/monash_univ/wp6-07.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6Yeg6094h"/></comment>
          <!-- <pub-id pub-id-type="other">6Yeg6094h</pub-id> -->
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The Parable of Google Flu: Traps in Big Data Analysis</article-title>
          <source>Science</source>
          <year>2014</year>
          <month>03</month>
          <pub-id pub-id-type="doi">10.1126/science.1248506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>David Andre</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>Michael J</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>Mark</given-names>
            </name>
          </person-group>
          <article-title>Twitter: big data opportunities</article-title>
          <source>Science</source>
          <year>2014</year>
          <month>07</month>
          <day>11</day>
          <volume>345</volume>
          <issue>6193</issue>
          <fpage>148</fpage>
          <pub-id pub-id-type="doi">10.1126/science.345.6193.148-a</pub-id>
          <pub-id pub-id-type="medline">25013052</pub-id>
          <pub-id pub-id-type="pii">345/6193/148-a</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>