<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i3e18281</article-id>
      <article-id pub-id-type="pmid">32940617</article-id>
      <article-id pub-id-type="doi">10.2196/18281</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Potential Early Identification of a Large Campylobacter Outbreak Using Alternative Surveillance Data Sources: Autoregressive Modelling and Spatiotemporal Clustering</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Yiding</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aminbeidokhti</surname>
            <given-names>Amirhossein</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Adnan</surname>
            <given-names>Mehnaz</given-names>
          </name>
          <degrees>BEng, MS, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Environmental Science and Research</institution>
            <addr-line>Kenepuru Science Centre</addr-line>
            <addr-line>Porirua, 5022</addr-line>
            <country>New Zealand</country>
            <phone>64 274044941</phone>
            <email>mehnaz.adnan@esr.cri.nz</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4810-0452</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>Xiaoying</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6326-7947</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Bai</surname>
            <given-names>Xiaohan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1213-1107</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Newbern</surname>
            <given-names>Elizabeth</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8486-6471</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Sherwood</surname>
            <given-names>Jill</given-names>
          </name>
          <degrees>MBChB, MPHTM</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0853-4851</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Jones</surname>
            <given-names>Nicholas</given-names>
          </name>
          <degrees>MBChB, FNZCPHM</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5964-8037</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Baker</surname>
            <given-names>Michael</given-names>
          </name>
          <degrees>MBChB, FNZCPHM</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1865-1536</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wood</surname>
            <given-names>Tim</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8712-8335</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>Wei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2028-2407</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institute of Environmental Science and Research</institution>
        <addr-line>Porirua</addr-line>
        <country>New Zealand</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Victoria University of Wellington</institution>
        <addr-line>Wellington</addr-line>
        <country>New Zealand</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Hawke's Bay District Health Board</institution>
        <addr-line>Hawke's Bay</addr-line>
        <country>New Zealand</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>University of Otago</institution>
        <addr-line>Wellington</addr-line>
        <country>New Zealand</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mehnaz Adnan <email>mehnaz.adnan@esr.cri.nz</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jul-Sep</season>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>9</month>
        <year>2020</year>
      </pub-date>
      <volume>6</volume>
      <issue>3</issue>
      <elocation-id>e18281</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>6</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Mehnaz Adnan, Xiaoying Gao, Xiaohan Bai, Elizabeth Newbern, Jill Sherwood, Nicholas Jones, Michael Baker, Tim Wood, Wei Gao. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 17.09.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://publichealth.jmir.org/2020/3/e18281/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Over one-third of the population of Havelock North, New Zealand, approximately 5500 people, were estimated to have been affected by campylobacteriosis in a large waterborne outbreak. Cases reported through the notifiable disease surveillance system (notified case reports) are inevitably delayed by several days, resulting in slowed outbreak recognition and delayed control measures. Early outbreak detection and magnitude prediction are critical to outbreak control. It is therefore important to consider alternative surveillance data sources and evaluate their potential for recognizing outbreaks at the earliest possible time.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The first objective of this study is to compare and validate the selection of alternative data sources (general practice consultations, consumer helpline, Google Trends, Twitter microblogs, and school absenteeism) for their temporal predictive strength for Campylobacter cases during the Havelock North outbreak. The second objective is to examine spatiotemporal clustering of data from alternative sources to assess the size and geographic extent of the outbreak and to support efforts to attribute its source.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We combined measures derived from alternative data sources during the 2016 Havelock North campylobacteriosis outbreak with notified case report counts to predict suspected daily Campylobacter case counts up to 5 days before cases reported in the disease surveillance system. Spatiotemporal clustering of the data was analyzed using Local Moran’s I statistics to investigate the extent of the outbreak in both space and time within the affected area.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Models that combined consumer helpline data with autoregressive notified case counts had the best out-of-sample predictive accuracy for 1 and 2 days ahead of notified case reports. Models using Google Trends and Twitter typically performed the best 3 and 4 days before case notifications. Spatiotemporal clusters showed spikes in school absenteeism and consumer helpline inquiries that preceded the notified cases in the city primarily affected by the outbreak.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Alternative data sources can provide earlier indications of a large gastroenteritis outbreak compared with conventional case notifications. Spatiotemporal analysis can assist in refining the geographical focus of an outbreak and can potentially support public health source attribution efforts. Further work is required to assess the location of such surveillance data sources and methods in routine public health practice.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>Campylobacter</kwd>
        <kwd>disease outbreaks</kwd>
        <kwd>forecasting</kwd>
        <kwd>spatio-temporal analysis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In August 2016, Havelock North, one of the 5 cities in the Hawke’s Bay region, New Zealand, was the site of a large waterborne outbreak of Campylobacter infection. This outbreak began on August 8, but a large number of cases were not known to the national notifiable disease surveillance system until August 14. By that time, more than a third of Havelock North residents had been infected with Campylobacter. This event led to serious interruption of daily life in the area and large economic costs [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>The surveillance for notifiable diseases in New Zealand is predominantly passive, with laboratories and physicians notifying their local public health service through submission to the national notifiable disease surveillance system, EpiSurv [<xref ref-type="bibr" rid="ref3">3</xref>]. There are inevitable delays from when people are exposed to an outbreak source (in this outbreak, the source was contaminated drinking water) to when they become ill, seek medical care, are diagnosed, and then notified to health authorities. There are usually further delays before an outbreak is recognized, investigated, and controlled. Therefore, notifiable disease reports are after the fact, and the information is typically delayed due to systematic information flow through traditional channels, for example, from physicians and laboratories.</p>
        <p>Interest in considering alternative data sources for early prediction of such outbreaks was motivated by previously published work reporting on the use of data from internet search engines [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>], crowd-sourced participatory disease surveillance systems [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], Twitter microblogs [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], news stories [<xref ref-type="bibr" rid="ref12">12</xref>], school absenteeism reports [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], general practice (GP) consultations [<xref ref-type="bibr" rid="ref15">15</xref>], consumer helpline calls [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>], bank transactions [<xref ref-type="bibr" rid="ref18">18</xref>], and numerous other sources. Location-aware applications have also been exploited for public and environmental health surveillance and crisis management [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] or to provide situational awareness and forecasting for disease outbreaks at the local level [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>This study revisits the Havelock North Campylobacter outbreak to examine signals present in data sources that were not available to the public health team during the response. By analyzing temporal and spatiotemporal patterns in these alternative data sources, the study assesses the relative effectiveness and sensitivity of different data sources in detecting the outbreak earlier. First, we aim to assess the temporal predictive strength of modeled combinations of measures from the following daily alternative data sources: GP consultations, consumer health helpline calls, Google Trends, Twitter microblogs, and school absenteeism records. These models will be measured by the time gained (up to 5 days ahead) compared with the cases notified in the existing disease surveillance system, using multiple evaluation metrics. Second, we will examine city-level spatiotemporal patterns in measures from alternative data sources relative to notified case counts to identify clusters and outliers in both space and time over the outbreak period.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics</title>
        <p>The study protocol was approved by the Health and Disability Ethics Committee, New Zealand, under the protocol number NZ/1/6350114. The Twitter data used in this study were obtained under the Twitter terms and conditions and in agreement with its public privacy settings.</p>
      </sec>
      <sec>
        <title>Data Collection and Management</title>
        <p>For the greater area affected by the outbreak (Hawkes Bay), we collected daily data for the entire 2016 calendar year from the data sources described in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Description of data sources used in analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="280"/>
            <col width="280"/>
            <col width="120"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Source</td>
                <td>Fields of interest</td>
                <td>Data level used in analysis</td>
                <td>Counts</td>
                <td>References</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Notified case count (New Zealand surveillance database EpiSurv)</td>
                <td>Date of onset, testing, and notification for confirmed and probable cases of campylobacteriosis</td>
                <td>Aggregated by notification date and city of residence in Hawkes Bay</td>
                <td>1345</td>
                <td>Ministry of Health New Zealand [<xref ref-type="bibr" rid="ref3">3</xref>]</td>
              </tr>
              <tr valign="top">
                <td>General practice consultations (HealthStat)</td>
                <td>Visits for gastrointestinal complaints</td>
                <td>Individual with visit date, age, and sex, for entire Hawkes Bay District Health Board area only</td>
                <td>772</td>
                <td>Cumming J and Gribben B [<xref ref-type="bibr" rid="ref21">21</xref>]</td>
              </tr>
              <tr valign="top">
                <td>Consumer helpline (HealthLine) calls</td>
                <td>Consumer calls concerning gastrointestinal complaints</td>
                <td>Individual with call date, age, sex, and residential city in Hawkes Bay</td>
                <td>1196</td>
                <td>St George IM and Cullen MJ [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
              </tr>
              <tr valign="top">
                <td>Google Trends</td>
                <td>User queries with keywords for gastrointestinal complaints</td>
                <td>Normalized counts aggregated by date, query keyword, and Google Trends normalized count for entire Hawkes Bay District Health Board area only</td>
                <td>Not applicable</td>
                <td>Google Trends [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
              </tr>
              <tr valign="top">
                <td>Twitter microblogs (from Gnip Historical PowerTrack service)</td>
                <td>Tweets with keywords for gastrointestinal complaints</td>
                <td>Individual tweets geocoded to cities in Hawkes Bay</td>
                <td>191</td>
                <td> Gnip [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
              </tr>
              <tr valign="top">
                <td>School absenteeism records (from individual schools)</td>
                <td>Absence owing to illness or any valid reason</td>
                <td>Aggregated by schools for the 5 schools providing data, areas represented: Havelock North, Napier, and Hastings</td>
                <td>23,836</td>
                <td>Ministry of Education, New Zealand [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <sec>
          <title>Notified Case Count</title>
          <p>We extracted confirmed and suspected cases of campylobacteriosis in Hawkes Bay from EpiSurv [<xref ref-type="bibr" rid="ref3">3</xref>] and aggregated them by report date and city-level locations. EpiSurv is the core surveillance system used for monitoring the occurrence of notifiable infectious diseases such as campylobacteriosis and detecting increases that may indicate an outbreak in New Zealand [<xref ref-type="bibr" rid="ref26">26</xref>]. We refer to these data as <italic>notified case counts</italic> and use them as the main comparator for assessing the potential value of alternative surveillance data sources.</p>
        </sec>
        <sec>
          <title>GP Consultations</title>
          <p>Daily data on consultations with GPs were collected through HealthStat. This system automatically monitors the number of people who consult primary care medical practitioners based on automated extracts of GP-coded data from computerized practice management systems [<xref ref-type="bibr" rid="ref21">21</xref>]. The data we used were the daily counts of those who consulted for gastroenteritis.</p>
        </sec>
        <sec>
          <title>Consumer Helpline Calls</title>
          <p>Consumer helpline data were collected from HealthLine, which is a free national 24-hour 0800 telephone health advice service funded by the New Zealand Ministry of Health [<xref ref-type="bibr" rid="ref22">22</xref>]. Calls made to HealthLine are triaged using electronic clinical decision support software. The data collected are a daily count and the city-level location of all phone calls made to HealthLine by people reporting symptoms of gastrointestinal illness. A list of the symptoms used is included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
        <sec>
          <title>Google Trends</title>
          <p>Google Trends provides a time series index of the volume of queries users enter into Google in a given geographic area [<xref ref-type="bibr" rid="ref23">23</xref>]. We collected daily Google Trends data for a range of keywords that could be used to search for information regarding any gastrointestinal illness (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for a list of keywords). These Google Trends data were downloaded within a single day, as Google varies the signal display over time. Google Trends data for the selected keywords were assessed for correlation and cross correlation with the notified case counts for up to 10 previous days, and those keywords with correlations over 0.03 were chosen for the further analysis: “campylobacter,” “diarrhoea,” “diarrhea,” “gastro,” “gastroenteritis,” “puke,” and “vomiting.” Pearson correlation and cross correlation (same day and lagged) of these keywords in Google Trends with notified case counts of campylobacteriosis (January 2016 to July 2016) are presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        </sec>
        <sec>
          <title>Twitter Microblogs</title>
          <p>Twitter is a free social networking and microblogging service that enables millions of users to send and read each other's tweets, or short, 140-character messages. Registered users collectively send more than 200 million tweets a day. Twitter accounts are by default public and visible to all (even to unregistered visitors using the Twitter website). Users can restrict their account settings to private, in which case their contents can only be visible to approved followers.</p>
          <p>In a previous study, we obtained Twitter data from Gnip, their licensed data provider, through their Historical PowerTrack service [<xref ref-type="bibr" rid="ref24">24</xref>]. In contrast to the publicly available Twitter data stream (Twitter application programing interface), which provides approximately 1% of all real-time tweets, the Historical PowerTrack provides search access to 100% of all publicly available tweets as well as metadata associated with each tweet. Tweets generated between April 2012 and March 2017 were collected from PowerTrack. They contained one or more gastrointestinal-related keywords and were assigned a country code of New Zealand in the Tweet or in the user profile location. The Gnip Query to collect Twitter data is included in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>. A total of 131,843 records were obtained. These data were first geocoded using the latitude and longitude of the tweet. If the tweet location was missing, the profile latitude and longitude were used.</p>
          <p>Twitter feeds were classified by developing a supervised machine learning classifier using the Naïve Bayes algorithm in Python. A total of 10,000 random tweets were manually labeled as (1) gastrointestinal illness, (2) other infectious illness, and (3) irrelevant tweets. A tweet was labeled “gastrointestinal illness” when its content described a recent account of infectious gastrointestinal illness, “infectious illness” for tweets that described a recent account of other infectious illnesses, and “irrelevant” for tweets that did not fit in the other 2 categories. This training set was used to train the machine learning classifier, which was then used to classify the complete Twitter data. This classifier was evaluated on 1000 randomly selected and manually labeled tweets that were not included in the training set. Precision, recall, and F1 scores were calculated to evaluate the performance of the classifier. Precision is the ratio of observations judged relevant to the total observations predicted as relevant, recall is the ratio of observations judged relevant out of total relevant observations, and F1 is the weighted average of precision and recall [<xref ref-type="bibr" rid="ref27">27</xref>]. The classification method obtained a precision of 0.813, recall of 0.803, and F1 score of 0.804. We applied this developed supervised classifier to the data from the Hawkes Bay region for the period of January 1, 2016, to December 31, 2016.</p>
        </sec>
        <sec>
          <title>School Absenteeism</title>
          <p>We collected school absenteeism data from 5 schools in Hawke’s Bay: 2 from Havelock North, 2 from Hastings, and 1 from Napier. These included 4 primary schools and 1 secondary school. Primary school data had a reason for absence code, so we included data for codes related to illness and/or any justified absence. Absenteeism codes are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. For the secondary school, all absenteeism counts were included without any subcoding. Havelock North and Hastings were the areas primarily affected by the outbreak, whereas the Napier school served as a control.</p>
          <p>A daily time series with cumulative counts from all the previously mentioned data sources was constructed. For the school data set, days covering the school holidays were removed from the analysis. In all data sources, missing data values were estimated by interpolation of observational data. These adjustments were made to reduce the impact of missing data in the analysis.</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <sec>
          <title>Correlation and Cross Correlation</title>
          <p>To assess whether the selected data sources could have predicted this Campylobacter outbreak earlier, we used Pearson correlation statistics to calculate correlations between daily counts of these alternative surveillance measures and daily counts of notified cases. Correlations were calculated for the notified case count with the alternative measure on the same day as well as with up to a 10-day negative lag for each alternative measure (ie, correlating the notified case count on day t with the alternative measure on day t−10, t−9, etc; <xref ref-type="table" rid="table2">Table 2</xref>). Using this method, a significant correlation with the count on the same day indicates that the peak occurs at the same time [<xref ref-type="bibr" rid="ref28">28</xref>], and the cross correlation at a specific lag of <italic>x</italic> days indicates that the peak in the alternative measure occurs <italic>x</italic> days before the peak in notified cases.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Correlation and lagged transformed correlation of alternative predictors with notified case counts of campylobacteriosis.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="160"/>
              <col width="70"/>
              <col width="70"/>
              <col width="70"/>
              <col width="70"/>
              <col width="80"/>
              <col width="80"/>
              <col width="80"/>
              <col width="80"/>
              <col width="80"/>
              <col width="80"/>
              <col width="80"/>
              <thead>
                <tr valign="top">
                  <td>Data source</td>
                  <td colspan="11">Number of days that alternative measures are lagged before notifiable counts</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>0 days</td>
                  <td>−1 day</td>
                  <td>−2 days</td>
                  <td>−3 days</td>
                  <td>−4 days</td>
                  <td>−5 days</td>
                  <td>−6 days</td>
                  <td>−7 days</td>
                  <td>−8 days</td>
                  <td>−9 days</td>
                  <td>−10 days</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>GP<sup>a</sup> consultations</td>
                  <td>0.5<sup>b</sup></td>
                  <td>0.43<sup>b</sup></td>
                  <td>0.39<sup>b</sup></td>
                  <td>0.26<sup>b</sup></td>
                  <td>0.17<sup>b</sup></td>
                  <td>0.14<sup>b</sup></td>
                  <td>0.09</td>
                  <td>0.05</td>
                  <td>0.04</td>
                  <td>0.01</td>
                  <td>0.01</td>
                </tr>
                <tr valign="top">
                  <td>Consumer helpline</td>
                  <td>0.44<sup>b</sup></td>
                  <td>0.59<sup>b</sup></td>
                  <td>0.67<sup>b</sup></td>
                  <td>0.64<sup>b</sup></td>
                  <td>0.55<sup>b</sup></td>
                  <td>0.37<sup>b</sup></td>
                  <td>0.2<sup>b</sup></td>
                  <td>0.12<sup>b</sup></td>
                  <td>0.1</td>
                  <td>0.07</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Google Trends</td>
                  <td>0.13<sup>b</sup></td>
                  <td>0.16<sup>b</sup></td>
                  <td>0.22<sup>b</sup></td>
                  <td>0.22<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.17<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.16<sup>b</sup></td>
                  <td>0.08</td>
                  <td>0.02</td>
                </tr>
                <tr valign="top">
                  <td>Twitter microblogs</td>
                  <td>0.11<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.31<sup>b</sup></td>
                  <td>0.25<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.07</td>
                  <td>0</td>
                  <td>−0.01</td>
                  <td>0</td>
                  <td>−0.03</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>School absenteeism</td>
                  <td>0.3<sup>b</sup></td>
                  <td>0.48<sup>b</sup></td>
                  <td>0.64<sup>b</sup></td>
                  <td>0.7<sup>b</sup></td>
                  <td>0.52<sup>b</sup></td>
                  <td>0.35<sup>b</sup></td>
                  <td>0.21<sup>b</sup></td>
                  <td>0.2<sup>b</sup></td>
                  <td>0.17<sup>b</sup></td>
                  <td>0.18<sup>b</sup></td>
                  <td>0.15<sup>b</sup></td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>GP: general practice.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>Statistically significant correlation coefficient &#62;0.1.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Models</title>
          <p>To forecast daily suspected cases of campylobacteriosis, a collection of multivariable autoregressive integrated moving average (ARIMA) models were constructed. These models were found to be a good tool for the prediction of communicable disease incidences [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. These models are denoted as ARIMA(p,d,q), where parameters p, d, and q are non-negative integers; p is the number of autoregressive terms, d is the degree of differencing needed for stationarity, and q is the moving average component of the model. Data from January 1 to July 31, 2016, were used for model development. Model identification for ARIMA was initiated using the R statistical function auto.arima, which uses the Bayes information criterion to determine the orders p and q and the Phillips-Perron unit root test for determining the order d<italic>.</italic></p>
          <p>These models used the negative lagged (day −1 to day −10) daily counts for each alternative measure (<xref ref-type="table" rid="table2">Table 2</xref>) and the nonlagged notified case counts as covariates. We computed various permutations using different combinations of covariates and chose the optimal combination of covariates using the root mean square error (RMSE). The autocorrelation and partial autocorrelation plots of the models obtained from auto.arima were examined to further adjust the range of ARIMA (p and q) parameters. In addition to the models that used the aforementioned data streams as covariates, we built baseline models with only notified case counts for comparison and context. We considered models that only used historical observation of Campylobacter cases to predict cases on the subsequent days and models that incorporated information from the various alternative data streams to compare their predictive abilities during the volatile peak of the outbreak.</p>
          <p>Models were thus evaluated for their predictive performance during the test period from July 31 to August 30, 2016. For each model, we report 3 evaluation metrics: the Pearson correlation (ρ), RMSE, and the relative root mean square error (rRMSE) of the predictions. ρ is a measure of the linear dependence between two variables during a period. RMSE is a measure of the difference between the predicted and true values. rRMSE is a measure of the percent difference between the predicted and true values. The equations for these measures are given below:</p>
          <graphic xlink:href="publichealth_v6i3e18281_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <graphic xlink:href="publichealth_v6i3e18281_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <graphic xlink:href="publichealth_v6i3e18281_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where y<sub>i</sub> denotes the observed value of the notified Campylobacter cases at time t<sub>i</sub>, x<sub>i</sub> denotes the predicted value by any model at time t<sub>i</sub>, <inline-graphic xlink:href="publichealth_v6i3e18281_fig7.png" mimetype="image" xlink:type="simple"/> denotes the mean of the observed values, and <inline-graphic xlink:href="publichealth_v6i3e18281_fig8.png" mimetype="image" xlink:type="simple"/> denotes the mean of the predicted values.</p>
        </sec>
        <sec>
          <title>Spatiotemporal Clustering</title>
          <p>Sources that included city-level locations (notified cases, school absenteeism, consumer helpline, and Twitter feeds) were used for spatiotemporal analysis. To understand the spatial and temporal trends of the event data, we broke them up into a series of time snapshots, using the space-time cube method [<xref ref-type="bibr" rid="ref33">33</xref>]. We applied this method to the data for August 2016 from Havelock North and Hastings, the two largely affected cities in the outbreak.</p>
          <p>We used a Local Outlier Analysis tool in ArcGIS (Esri) to identify locations that were statistically different from their neighbors in both space and time. This tool generates Anselin Local Moran’s I [<xref ref-type="bibr" rid="ref34">34</xref>] statistics for each space-time window. These statistics have been used for spatial outlier detection in domains such as emergency management [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], epidemiology [<xref ref-type="bibr" rid="ref37">37</xref>], and economics [<xref ref-type="bibr" rid="ref38">38</xref>]. A Local Moran’s I with a negative value (representing high-low or low-high autocorrelation) suggests dissimilarity with neighbors; hence, an outlier, with a positive value (representing high-high or low-low autocorrelation) suggests similarity and a zero value suggests randomness. A <italic>P</italic> value less than .05 indicates that the cluster or outlier is statistically significant [<xref ref-type="bibr" rid="ref39">39</xref>]. Twitter was found to be insufficient in terms of spatialized city-level data (with no tweet from Havelock North and only 4 from Hastings during the outbreak period) to generate Local Moran’s I statistics and hence was excluded from this analysis. The analysis was performed using ArcGIS Pro version 2.1.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Relationship Between Notified Cases and Alternative Data</title>
        <p>All alternative surveillance measures correlated significantly with notified Campylobacter cases on the same day. Many of these alternative surveillance measures also demonstrated strong correlations when lagged 1 to 8 days before notified cases. Indeed, the correlation ranged from 0.14 to 0.43 for up to 5 days of lag for GP consultations, 0.12 to 0.67 for up to 7 days of lag for consumer helpline inquiries, 0.16 to 0.22 for up to 8 days of lag for Google Trends, 0.21 to 0.31 for up to 4 days of lag for Twitter, and 0.15 to 0.7 for up to 10 days of lag for school absenteeism (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
      </sec>
      <sec>
        <title>ARIMA Models</title>
        <p>The final ARIMA models and the covariates of alternative data sources with their in-sample error measure of RMSE are summarized in <xref ref-type="table" rid="table3">Table 3</xref>. We found multiple models suitable for prediction: school absenteeism performed best (average RMSE: 1.00) with ARIMA (5,1,3) for forecasting 1 to 2 days ahead and ARIMA (5,0,2) for forecasting 3 to 5 days ahead, followed by Google Trends (average RMSE: 1.07) with ARIMA (2,0,0) for forecasting up to 5 days ahead. GP consultation was found to have an average RMSE of 1.04, with ARIMA (3,0,1) for forecasting for the following day and ARIMA (2,0,0) for forecasting 2-5 days ahead. Twitter had an average RMSE of 1.08 and HealthLine had an average RMSE of 1.084 when used as the covariates in the models for predicting notified case counts.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Autoregressive integrated moving average models with time-lagged covariates used with alternative data sources for forecasting 1 to 5 days ahead.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="390"/>
            <col width="0"/>
            <col width="320"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="90"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Alternative data source and forecast step</td>
                <td colspan="2">Time-lagged covariates, days<sup>a</sup></td>
                <td colspan="2">ARIMA<sup>b</sup> order<sup>c</sup></td>
                <td colspan="2">RMSE<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>GP<sup>e</sup> consultations</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1 day</td>
                <td colspan="2">1 to 10</td>
                <td colspan="2">3,0,1</td>
                <td>1.01</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">2 days</td>
                <td colspan="2">2 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.04</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3 days</td>
                <td colspan="2">3 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.04</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">4 days</td>
                <td colspan="2">4 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.05</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5 days</td>
                <td colspan="2">5 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.06</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Consumer helpline</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1 day</td>
                <td colspan="2">1, 2, 3, 4, 5, 6, 7, 8, 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">2 days</td>
                <td colspan="2">2, 3, 5, 6, 7, 8, 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3 days</td>
                <td colspan="2">3, 4, 5, 6, 7, 8, 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">4 days</td>
                <td colspan="2">4, 6, 7, 8, 9, 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5 days</td>
                <td colspan="2">6, 7, 8, 9, 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Google Trends</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1 day</td>
                <td colspan="2">1 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.07</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">2 days</td>
                <td colspan="2">2 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3 days</td>
                <td colspan="2">3 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">4 days</td>
                <td colspan="2">4 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5 days</td>
                <td colspan="2">5 to 10</td>
                <td colspan="2">2,0,0</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Twitter</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1 day</td>
                <td colspan="2">1 to 10</td>
                <td colspan="2">4,0,1</td>
                <td>1.07</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">2 days</td>
                <td colspan="2">2 to 10</td>
                <td colspan="2">5,0,2</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3 days</td>
                <td colspan="2">3 to 10</td>
                <td colspan="2">3,0,2</td>
                <td>1.08</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">4 days</td>
                <td colspan="2">4 to 10</td>
                <td colspan="2">2,0,2</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5 days</td>
                <td colspan="2">5 to 10</td>
                <td colspan="2">2,0,2</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>School absenteeism</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1 day</td>
                <td colspan="2">1 to 10</td>
                <td colspan="2">5,1,3</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">2 days</td>
                <td colspan="2">2 to 10</td>
                <td colspan="2">5,1,3</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3 days</td>
                <td colspan="2">3 to 10</td>
                <td colspan="2">5,1,3</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">4 days</td>
                <td colspan="2">4 to 10</td>
                <td colspan="2">5,0,2</td>
                <td>1.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5 days</td>
                <td colspan="2">5 to 10</td>
                <td colspan="2">5,0,2</td>
                <td>1.09</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Lagged covariates refer to the time-lagged independent variables of alternative data source.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>ARIMA: autoregressive integrated moving average.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>ARIMA order (p,d,q) refers to the number of autoregressive terms, degree of differencing, and moving average components of the model.</p>
            </fn>
            <fn id="table3fn4">
              <p><bold><sup>d</sup></bold>RMSE<bold>:</bold> root mean square error.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>GP: general practice.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We produced predictions for 1 to 5 days ahead during the outbreak (ie, the testing period) using the models in <xref ref-type="table" rid="table3">Table 3</xref> and with the baseline models that used only autoregressive notified case counts. The daily estimations of the models with autoregressive (AR) information of notified case counts, AR with Google Trends (AR+GT), AR with consumer helpline (AR+CHL), AR with GP consultations (AR+GP), AR with school absenteeism (AR+ABS), and AR with Twitter (AR+Twitter) are presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Actual notified case counts and prediction results 1 to 5 days ahead for all developed models, with their prediction errors based on relative root mean square error. The best model performance with the lowest prediction error (relative root mean square error) in each time series is shown as a bold line. ABS: abseentism; AR: autoregressive; CHL: consumer helpline; GP: general practice; GT: Google Trends.</p>
          </caption>
          <graphic xlink:href="publichealth_v6i3e18281_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="table" rid="table4">Table 4</xref> summarizes the predictive performance of the models during the test period for each of the 1-, 2-, 3-, 4-, and 5-day ahead predictions, as captured by the 3 evaluation metrics RMSE, rRMSE, and ρ. Although some model’s predictions showed good correlation with the notified case counts, their predictions showed large discrepancies from the true number of cases reported, as shown by the rRMSE. The rRMSE provides an estimate of the prediction error relative to the number of actual cases reported in each day over the evaluation period, and from our perspective, it provides a better measure of the quality of model prediction given the short time span of the outbreak.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Root mean square error, relative root mean square error, and Pearson correlation for 1-, 2-, 3-, 4-, and 5-day ahead predictions during the test period (August 2016).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="70"/>
            <col width="70"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="0"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="0"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="0"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="3">1 Day</td>
                <td colspan="4">2 Days</td>
                <td colspan="4">3 Days</td>
                <td colspan="4">4 Days</td>
                <td colspan="3">5 Days</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>RMSE<sup>a</sup></td>
                <td>rRMSE<sup>b</sup></td>
                <td>ρ<sup>c</sup></td>
                <td>RMSE</td>
                <td>rRMSE</td>
                <td>ρ</td>
                <td colspan="2">RMSE</td>
                <td>rRMSE</td>
                <td>ρ</td>
                <td colspan="2">RMSE</td>
                <td>rRMSE</td>
                <td>ρ</td>
                <td colspan="2">RMSE</td>
                <td>rRMSE</td>
                <td>ρ</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>AR<sup>d</sup></td>
                <td>15.28</td>
                <td>46.9</td>
                <td>0.917</td>
                <td>23.73</td>
                <td>72.8</td>
                <td>0.76</td>
                <td colspan="2">33.9</td>
                <td>105.3</td>
                <td>0.82</td>
                <td colspan="2">38.85</td>
                <td>119.2</td>
                <td>0.20</td>
                <td colspan="2">67.57</td>
                <td>202</td>
                <td>0.65</td>
              </tr>
              <tr valign="bottom">
                <td>AR+CHL<sup>e</sup></td>
                <td>
                  <italic>2.74</italic>
                  <sup>f</sup>
                  <italic> </italic>
                </td>
                <td>
                  <italic>8.4</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>0.996</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>15.1</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>46.3</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>0.91</italic>
                  <sup>f</sup>
                </td>
                <td colspan="2">39.74</td>
                <td>123.5</td>
                <td>0.79</td>
                <td colspan="2">38.14</td>
                <td>117</td>
                <td>0.28</td>
                <td colspan="2">68.51</td>
                <td>204.8</td>
                <td>0.64</td>
              </tr>
              <tr valign="bottom">
                <td>AR+GP<sup>g</sup></td>
                <td>15.71</td>
                <td>48.2</td>
                <td>0.901</td>
                <td>23.77</td>
                <td>72.9</td>
                <td>0.75</td>
                <td colspan="2">31.55</td>
                <td>98</td>
                <td>0.84</td>
                <td colspan="2">39.59</td>
                <td>121.4</td>
                <td>0.21</td>
                <td colspan="2">63.21</td>
                <td>189</td>
                <td>0.66</td>
              </tr>
              <tr valign="bottom">
                <td>AR+GT<sup>h</sup></td>
                <td>12.9</td>
                <td>39.6</td>
                <td>0.933</td>
                <td>22.5</td>
                <td>69</td>
                <td>0.76</td>
                <td colspan="2">
                  <italic>29.86</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>92.8</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>0.85</italic>
                  <sup>f</sup>
                </td>
                <td colspan="2">37.84</td>
                <td>116.1</td>
                <td>0.21</td>
                <td colspan="2">
                  <italic>62.41</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>186.6</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>0.66</italic>
                  <sup>f</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>AR+Twitter</td>
                <td>11.61</td>
                <td>35.6</td>
                <td>0.951</td>
                <td>22.67</td>
                <td>69.5</td>
                <td>0.80</td>
                <td colspan="2">35.63</td>
                <td>110.7</td>
                <td>0.81</td>
                <td colspan="2">
                  <italic>26.76</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>82.1</italic>
                  <sup>f</sup>
                </td>
                <td>
                  <italic>0.61</italic>
                  <sup>f</sup>
                </td>
                <td colspan="2">80.83</td>
                <td>241.7</td>
                <td>0.62</td>
              </tr>
              <tr valign="bottom">
                <td>AR+ABS<sup>i</sup></td>
                <td>4.74</td>
                <td>14.5</td>
                <td>0.989</td>
                <td>15.97</td>
                <td>49</td>
                <td>0.89</td>
                <td colspan="2">38.68</td>
                <td>120.2</td>
                <td>0.81</td>
                <td colspan="2">47.26</td>
                <td>145</td>
                <td>0.28</td>
                <td colspan="2">71.5</td>
                <td>213.8</td>
                <td>0.65</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>RMSE: root mean square error.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>rRMSE: relative root mean square error.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>ρ: Pearson correlation.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>AR: autoregressive.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>CHL: consumer helpline.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>Best performing model for a particular day on basis of the rRMSE.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>GP: general practice.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>GT: Google Trends.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup>ABS: school absenteeism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As seen in the evaluation metric values in <xref ref-type="table" rid="table4">Table 4</xref>, no model depending on a single data source performed best across all metrics or time periods. On the basis of the rRMSE, models that combined consumer helpline with autoregressive information (AR+CHL) outperformed all other models for 1 day and 2 days ahead predictions (rRMSE=8.4 and 46.3, respectively). Meanwhile, models that combined Twitter with autoregressive information from notified cases (AR+Twitter) performed best for 4-day ahead prediction (rRMSE=82.1), and models that combined Google Trends with autoregressive information (AR+GT) performed best for 3- and 5-day ahead predictions (rRMSE=92.8 and 186.6, respectively). In all time periods, the model using only the historical case counts underperformed all the other models.</p>
        <p>The out-of-sample (ie, using the data for the testing period) prediction with the best performing models for the 1, 2, 3, 4, and 5 days ahead time horizons and their prediction errors are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Across models, prediction accuracy decreased as predictions were made further days ahead, resulting in increases in rRMSE (and RMSE) and decrease in model correlations across time horizons. For example, for the best models, based on Google Trends, the prediction error nearly doubled from the 3-day to the 5-day forecast.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The daily estimations of the best performing models (lowest relative root mean square error) and their prediction errors during the testing period (August 2016). AR: autoregressive; CHL: consumer helpline; GT: Google Trends.</p>
          </caption>
          <graphic xlink:href="publichealth_v6i3e18281_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Clustering and Cluster Detection</title>
        <p>The summarized cluster types in notified case counts, consumer helpline inquiries, and school absenteeism in Hastings and Havelock North are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Both notified case counts and consumer helpline inquiries indicated high-low outliers in Hastings and multiple cluster types (ie, high-high, low-low, high-low, and low-high) in Havelock North throughout the time period. The cluster types could not be identified in the Twitter data because of the limited availability of daily records in all 3 cities in the time period.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Cluster types in notified case counts, consumer helpline inquiries, and school’s absenteeism in Hastings and Havelock North. High-high cluster refers to high values surrounded by high values, high-low cluster refers to high values surrounded by low values, low-high cluster refers to low values surrounded by high values, and low-low cluster refers to low values surrounded by low values. Multiple Types refer to multiple cluster-type designations (ie, high high, low low, high low, and low high) through the time period.</p>
          </caption>
          <graphic xlink:href="publichealth_v6i3e18281_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The prevalence of the designation Multiple Types did not illuminate trends or clusters in the data set. Therefore, we examined daily Local Moran’s I to compare the clustering between 2 cities during the outbreak (<xref ref-type="table" rid="table5">Table 5</xref>). Comparing the 2 cities, clustering in data sources was very weak in Hastings, compared with Havelock North. On the basis of Local Moran’s I, outliers were found in school absenteeism and consumer helpline (Moran’s I: −0.40 and −0.77, respectively) in Havelock North on August 11, 2016, which continued to grow in size until August 15, 2016. After 3 days, a stronger outlier appeared in the notified case counts (−2.17) from Havelock North. In Hastings, no significant cluster appeared in school absenteeism, a relatively weak cluster appeared in notified case counts, and a consumer helpline outlier appeared on August 14. These data suggest that the spatiotemporal indicators in consumer helpline and school absenteeism indicated the outbreak in Havelock North 3 days earlier than the notified surveillance data.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Daily Local Moran’s I in school absenteeism, consumer helpline inquiries, and notified case counts in Havelock North and Hastings cities in August 2016.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="150"/>
            <col width="150"/>
            <col width="140"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Date</td>
                <td colspan="3">Havelock North</td>
                <td colspan="3">Hastings</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>School absenteeism</td>
                <td>Consumer helpline</td>
                <td>Notified case count</td>
                <td>School absenteeism</td>
                <td>Consumer helpline</td>
                <td>Notified case count</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Moran’s I value, Z score</td>
                <td>Moran’s I value, Z score</td>
                <td>Moran’s I value, Z score</td>
                <td>Moran’s I value, Z score</td>
                <td>Moran’s I value, Z score</td>
                <td>Moran’s I value, Z score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>August 4, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.03 (−0.16)</td>
                <td>0.04 (−0.23)</td>
                <td>0.08 (−0.29)</td>
              </tr>
              <tr valign="top">
                <td>August 5, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.04 (−0.23)</td>
                <td>0.07 (−0.29)</td>
                <td>0.09 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 6, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 7, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 8, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 9, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 10, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.04 (−0.19)</td>
                <td>0.03 (−0.1)</td>
                <td>0.09 (−0.29)</td>
              </tr>
              <tr valign="top">
                <td>August 11, 2016</td>
                <td>−<italic>0.40 (1.74)</italic><sup>a,</sup><sup>b</sup></td>
                <td>−<italic>0.77 (2.71)</italic><sup>a,</sup><sup>b</sup></td>
                <td>0 (0.01)</td>
                <td>0.03 (−0.15)</td>
                <td>0.01 (−0.1)</td>
                <td>0.08 (−0.29)</td>
              </tr>
              <tr valign="top">
                <td>August 12, 2016</td>
                <td>−0.40 (−0.23)</td>
                <td>-0.77 (−0.29)</td>
                <td>0 (−0.32)</td>
                <td>0.04 (−0.23)</td>
                <td>0.03 (−0.29)</td>
                <td>0.09 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 13, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 14, 2016</td>
                <td>−1.62 (7.08)<sup>a</sup></td>
                <td>−1.92 (6.71)<sup>a</sup></td>
                <td>−<italic>2.17 (6.86)</italic><sup>a,</sup><sup>b</sup></td>
                <td>
                  <italic>0.04 (</italic>
                  <italic>−</italic>
                  <italic>0.16)</italic>
                </td>
                <td>−<italic>0.06 (0.22)</italic><sup>b</sup></td>
                <td>−<italic>0.20 (0.64)</italic><sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>August 15, 2016</td>
                <td>−1.62 (−0.23)</td>
                <td>−1.92 (−0.29)</td>
                <td>−2.17 (−0.32)</td>
                <td>0.03 (−0.17)</td>
                <td>-0.01 (−0.04)</td>
                <td>0.56 (0.89)</td>
              </tr>
              <tr valign="top">
                <td>August 16, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.03 (−0.16)</td>
                <td>0 (−0.04)</td>
                <td>1.20 (1.37)</td>
              </tr>
              <tr valign="top">
                <td>August 17, 2016</td>
                <td>0.05 (0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.02 (−0.15)</td>
                <td>0 (0.03)</td>
                <td>1.20 (0.89)</td>
              </tr>
              <tr valign="top">
                <td>August 18, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.02 (−0.11)</td>
                <td>0 (0.03)</td>
                <td>0.31 (0.35)</td>
              </tr>
              <tr valign="top">
                <td>August 19, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.03 (−0.23)</td>
                <td>−0.01 (−0.29)</td>
                <td>−0.11 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 20, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 21, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.03 (−0.13)</td>
                <td>0.01 (−0.04)</td>
                <td>−0.08 (0.25)</td>
              </tr>
              <tr valign="top">
                <td>August 22, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.02 (−0.17)</td>
                <td>0 (−0.04)</td>
                <td>−0.05 (−0.19)</td>
              </tr>
              <tr valign="top">
                <td>August 23, 2016</td>
                <td>−0.10 (0.45)</td>
                <td>−0.11 (0.37)</td>
                <td>−0.11 (0.34)</td>
                <td>0.03 (−0.18)</td>
                <td>0 (−0.1)</td>
                <td>−0.02(0.13)</td>
              </tr>
              <tr valign="top">
                <td>August 24, 2016</td>
                <td>0.21 (0.46)</td>
                <td>0.14 (0.37)</td>
                <td>0.12 (0.34)</td>
                <td>0.03 (−0.16)</td>
                <td>0.02 (−0.16)</td>
                <td>−0.03 (−0.23)</td>
              </tr>
              <tr valign="top">
                <td>August 25, 2016</td>
                <td>0.14 (0.3)</td>
                <td>0.14 (0.37)</td>
                <td>0.23 (0.68)</td>
                <td>0.03 (−0.16)</td>
                <td>0.04 (−0.23)</td>
                <td>0.06 (−0.29)</td>
              </tr>
              <tr valign="top">
                <td>August 26, 2016</td>
                <td>−0.07 (−0.23)</td>
                <td>−0.11 (−0.29)</td>
                <td>−0.22 (−0.32)</td>
                <td>0.04 (−0.23)</td>
                <td>0.07 (−0.29)</td>
                <td>0.09 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 27, 2016</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 28, 2016</td>
                <td>−0.05 (0.2)</td>
                <td>−0.01 (0.04)</td>
                <td>−0.11 (0.34)</td>
                <td>0.04 (−0.19)</td>
                <td>0.03 (−0.1)</td>
                <td>0.03 (−0.1)</td>
              </tr>
              <tr valign="top">
                <td>August 29, 2016</td>
                <td>−0.05 (−0.23)</td>
                <td>−0.01 (−0.29)</td>
                <td>−0.11 (−0.32)</td>
                <td>0.04 (−0.23)</td>
                <td>0.03 (−0.29)</td>
                <td>0.03 (−0.32)</td>
              </tr>
              <tr valign="top">
                <td>August 30, 2016</td>
                <td>−0.02 (0.11)</td>
                <td>−0.11 (0.37)</td>
                <td>0.05 (−0.16)</td>
                <td>0.05 (−0.23)</td>
                <td>0.08 (−0.29)</td>
                <td>0.10 (−0.32)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Negative values of the Moran’s I value and corresponding Z scores greater than 1.96 indicate that there is a statistically significant spatial outlier.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>First day when the data source shows a spatial outlier.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The results show that alternative surveillance data sources can be used to predict an increase in notified Campylobacter cases up to 5 days before the outbreak would be detected via the notifiable disease surveillance system. Importantly, models that relied solely on available time-lagged notified case data were found to be no better than the models based on alternative data sources in predicting near–real-time Campylobacter cases. This finding further underscores the need for alternative real-time data sources such as consumer helpline and Google Trends.</p>
        <p>Models that relied on consumer helpline calls provided 1 to 2 days of lead time before an increase in notified cases and consistently performed well, with low error rates. This finding suggests that consumer helpline data have potential utility for earlier detection of outbreaks of acute gastroenteritis. Qualitatively, this result is consistent with our expectations, as the consumer helpline and GP consultations are well-established services for those seeking medical attention in New Zealand [<xref ref-type="bibr" rid="ref22">22</xref>] and can be expected to provide good predictors of potential cases.</p>
        <p>The web data sources (Google Trends and Twitter) were found to be good estimators of Campylobacter cases, even earlier than consumer helpline data. For example, Google Trends reduced the prediction error by less than 6% compared with the next-best model (ie, with GP consultations) for 3-days ahead prediction, as shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <p>As seen in prediction studies for other diseases [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], the quality of predictions decreased as the time horizon of prediction increased. Specifically, for 1-day ahead predictions, we found that the model using consumer helpline combined with autoregressive terms (the AR+CHL model) performed best. The autoregressive terms generally help maintain predictions within a reasonable range, whereas the alternative data sources helped the models to respond more rapidly to sudden changes in the dynamics, a finding that has been documented in previous studies [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. However, for 3- to 5- day ahead predictions, models that used data from Google Trends and Twitter performed best. Google search and Twitter activity appear to respond more rapidly to fluctuations in the dynamics of campylobacteriosis. Evidently, people affected by Campylobacter begin searching for gastrointestinal-related keywords when starting to have symptoms or when they may suspect a risk of exposure. This suggests that monitoring search activity may help track disease incidence.</p>
        <p>Spatiotemporal analysis was also retrospectively able to confirm the area impacted by the outbreak. Havelock North and Hastings followed the same clustering in notified case counts and consumer helpline inquiries, whereas Hastings, which was not in the area most affected by the outbreak, had early peaks in consumer helpline inquiries and school absenteeism but fewer overall helpline calls and cases. Aggregating the time series data at the city level may immediately give indications of potential clusters, such as the one identified in Havelock North by Local Moran’s I statistics. In particular, primary clusters in school absenteeism and consumer helpline inquiries started on August 11, which was 3 days before the same type of cluster was found in notified case counts and a day earlier than actual public health response actions were initiated. Used prospectively, such spatiotemporal analysis could identify clusters and outbreaks earlier in their course than notification data [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are limitations in our approach from inherent biases in the alternative data sources. Users of any of these services are not representative of the general population or those at risk of exposure to pathogens. Google search patterns and care seeking may reflect media coverage and situational awareness rather than the actual impact of the outbreak. Local media in regions with a large outbreak may react differently than the regions where these diseases are fewer in number. Thus, media attention has the potential to dramatically influence our daily predictions [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>We used the correlation of keywords with notified cases to filter Google Trends data and to classify tweets, which improved the predictive values of these data sources. However, neither of these data sources can distinguish people who search or tweet because of awareness from those with infection. In addition, the static assessment of the predictive power of the included keywords can impose some limitations. Self-correcting keyword selection by dynamically reassessing the predictive power of each input variable, as discussed by McGough et al [<xref ref-type="bibr" rid="ref7">7</xref>], could be used in the future to mitigate these limitations. The terms that peak due to high media attention could thus be excluded from the model if their relationship with case count information has weakened.</p>
        <p>As mentioned in the Results section, there was insufficient Twitter data to use in the spatiotemporal analysis. However, tweets were only queried in English. With an already low tweet volume, capturing other languages such as Māori might be needed to refine models in the future. Furthermore, we relied on Twitter-generated coordinate information to capture local data. To overcome this limitation, future work could explore ways to geocode the data using location information in the tweet text [<xref ref-type="bibr" rid="ref43">43</xref>]. For temporal analysis, only limited Twitter and school absenteeism data were available from the entire Hawke’s Bay region, presenting a clear limitation to the power of the analysis. It is encouraging that despite the limited school absenteeism data, it was still found to show statistically significant spatiotemporal clusters at the city level.</p>
        <p>We are not advocating alternative data sources to replace traditional methods, but rather to complement them. For example, in the Havelock North outbreak, public health officials still required information that suggested an outbreak source (positive bacterial test from local water supply) to start control activities (boil water notice and chlorination of drinking water supply). Early signals from social media and HealthLine calls could have triggered efforts to investigate potential outbreak sources earlier. However, nontraditional surveillance carries with it the workload required to interpret and respond to signals, which can be extensive, as others have noted [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>].</p>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>This study shows a number of improvements over previous methodologies using monthly or weekly data from alternative sources to predict disease incidence in the community [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], notably by using diverse daily data sources and combining with autoregressive modeling and spatiotemporal clustering to predict the incidence of gastrointestinal illness in a localized outbreak. Many researchers have used internet search queries to build prediction models in recent years. Bahk et al [<xref ref-type="bibr" rid="ref6">6</xref>] used internet search query data for predicting weekly foodborne illness up to 2 months ahead of increases. Liu et al [<xref ref-type="bibr" rid="ref4">4</xref>] used internet queries to predict weekly dengue fever outbreaks. Both of these analyses used Spearman r correlation to quantify the strength of associations between disease incidence and internet search queries. Similar to our study, Bahk et al [<xref ref-type="bibr" rid="ref6">6</xref>] used the seasonal autoregressive integrative moving average (SARIMA) to develop their predictive models. However, Liu et al [<xref ref-type="bibr" rid="ref4">4</xref>] used regression tree models to assess the threshold effects between the weekly disease incidence and internet search queries. Their results are consistent with those in this study, finding that internet search query data provided a timely data source for predicting the incidence of disease.</p>
        <p>In addition to internet search volumes, some studies have used time-lagged data from Twitter to predict the incidence of diseases such as Zika [<xref ref-type="bibr" rid="ref7">7</xref>] and influenza-like illness [<xref ref-type="bibr" rid="ref5">5</xref>]. As in our study, McGough et al [<xref ref-type="bibr" rid="ref7">7</xref>] used ARIMA and rRMSE to select the best model and found that Google typically performed better than Twitter for 2- and 3-week ahead predictions. However, rather than using static keywords, this study used a dynamic keyword selection method. Nagar et al [<xref ref-type="bibr" rid="ref5">5</xref>] used an Englemen Granger co-integration test to make weekly predictions of influenza-like illness from time-lagged data sets containing Google, Twitter, and notified case counts. However, this study found that Twitter data produced better predictions than Google Trends data. Both of these studies found that time-lagged notified case data were not statistically significant in predicting cases in real time, in line with the results found in our study. In addition to regression models, Nagar et al [<xref ref-type="bibr" rid="ref5">5</xref>] also used a spatial scan technique to identify areas with relatively higher risk of disease, comparable with the outlier analysis using Local Moran’s I, which we used to identify spatial outliers.</p>
        <p>Dong et al [<xref ref-type="bibr" rid="ref14">14</xref>] used diverse data sources including over-the-counter drug sales, search queries, and school absenteeism to estimate the correlation of these data sources with influenza activity. As in our study, they found that 1-week lagged data of internet search queries and school absenteeism showed the strongest correlation with laboratory-confirmed cases. However, they did not attempt to estimate the activity of disease in the community ahead of time. Widerström et al [<xref ref-type="bibr" rid="ref17">17</xref>] used consumer helpline data and applied SARIMA to develop weekly predictive models for acute gastrointestinal illness and influenza-like illness. As in our study, consumer helpline data proved to be an important source for the early detection of outbreaks of these conditions. Wang et al [<xref ref-type="bibr" rid="ref18">18</xref>] suggested the possibility of using bank transaction data with a simple moving average to monitor post outbreak disease spread, and they gave the Havelock North outbreak as an example; however, the use of such data for early warning of the outbreak was not very encouraging.</p>
      </sec>
      <sec>
        <title>Implications and Further Research</title>
        <p>This study has further demonstrated that alternative surveillance data sources can identify large outbreaks of gastrointestinal illness a few days earlier than traditional surveillance methods. The lead time gained depends on the nontraditional surveillance data source used, with onset of symptoms quickly stimulating Google and Twitter activity followed soon after by calls to consumer health helplines, days off from school, and GP consultations.</p>
        <p>Such alternative data sources also need to be combined with suitable analytic methods that can be run routinely and easily to identify potential outbreaks, so they can be further investigated and acted on if control measures are needed. This research has identified models with autoregressive information as promising approaches for the analysis of a set of alternative data sources. However, for waterborne outbreaks, as in Havelock North, inclusion of measures from drinking water supply and weather conditions could be included as further data sources for disease surveillance.</p>
        <p>This study used the traditional ARIMA models to assess the efficiency of using alternative data sources for the early prediction of a large Campylobacter outbreak. The development of further machine learning models using other techniques to validate the results of this study will be useful. For example, deep learning–based algorithms have been found to increase the performance of traditional time series forecasting methods [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <p>The Havelock North outbreak was very large. The signal produced in data sources was therefore easier to detect than would be the case in a smaller outbreak where the signal-to-noise ratio would be lower. It would be useful to repeat the study with outbreaks of smaller magnitude and in different settings to determine whether similar findings apply.</p>
        <p>There are multiple operational questions that would need to be resolved before any of the methods identified here could be introduced for routine use by public health agencies in New Zealand or elsewhere. In particular, it is important to identify the range of conditions or syndromes where early detection is important for guiding effective public health action. It is also important to consider the volume of false positives that might be generated and the additional resources required to investigate and rule them out. The range of surveillance modalities also needs to be considered. For example, specific forms of environmental surveillance may be more effective for guiding public health action, for example, improved surveillance of drinking water quality and meteorological data may be more effective in preventing disease rather than focusing on early indicators of illness. Resource issues will also need to be considered, which might favor systems that are already operating on a real-time basis (eg, consumer calls to HealthLine).</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study presents several important conclusions. We tested the use of data from alternative sources in predictive models and showed that they could have provided earlier detection of the Havelock North outbreak. Given the need for early intervention to curb disease transmission, our model predictions could fill a critical time gap in existing surveillance based on notification of cases of disease. These notifications inevitably do not appear until a few days after the occurrence of a communicable disease outbreak. Our results show that models that combine consumer helpline data with autoregressive information of notified case counts performed best for predictions 1 and 2 days ahead, whereas models using Google and Twitter data performed best for predictions 3 and 4 days ahead, although with lower prediction accuracy. Spatiotemporal clusters showed an earlier spike in school absenteeism and consumer helpline inquiries when compared with the notified case counts in the city primarily affected by the outbreak, which suggests that spatiotemporal modeling of alternative data sources could help to identify and locate outbreaks earlier in their development. The methods presented here can potentially be expanded to other regions in the country to signal changes in disease incidence for public health decision makers. However, before doing that, a number of key questions will need to be systematically investigated to establish the practical role of these methods and how they could be most effectively integrated into routine public health practice.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Symptoms classified as gastrointestinal illness in HealthLine calls.</p>
        <media xlink:href="publichealth_v6i3e18281_app1.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Keywords used to collect Google Trends data.</p>
        <media xlink:href="publichealth_v6i3e18281_app2.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Correlation and cross correlation of key words in Google Trends with the notified case counts.</p>
        <media xlink:href="publichealth_v6i3e18281_app3.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Gnip Query to collect Twitter data.</p>
        <media xlink:href="publichealth_v6i3e18281_app4.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Codes used to collect absenteeism data form primary schools.</p>
        <media xlink:href="publichealth_v6i3e18281_app5.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AR</term>
          <def>
            <p>autoregressive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ARIMA</term>
          <def>
            <p>autoregressive integrated moving average</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CHL</term>
          <def>
            <p>consumer helpline</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GP</term>
          <def>
            <p>general practice</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GT</term>
          <def>
            <p>Google Trends</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RMSE</term>
          <def>
            <p>root mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">rRMSE</term>
          <def>
            <p>relative root mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SARIMA</term>
          <def>
            <p>seasonal autoregressive integrative moving average</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Health Research Council, New Zealand. The authors gratefully acknowledge the help of the Ministry of Health, New Zealand; participating schools; and Hawkes Bay District Health Board staff in making available the Twitter and school absenteeism data essential to this research.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Drew</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rippon</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The Economic Costs of the Havelock North August 2016 Waterborne Disease Outbreak</article-title>
          <source>Sapere Research Group Limited</source>
          <year>2017</year>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.health.govt.nz/system/files/documents/publications/havelock_north_outbreak_costing_final_report_-_august_2017.pdf">https://www.health.govt.nz/system/files/documents/publications/havelock_north_outbreak_costing_final_report_-_august_2017.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Report of the Havelock North Drinking Water Inquiry: Stage 1</article-title>
          <source>Internal Affairs</source>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.dia.govt.nz/vwluResources/Report-Havelock-North-Water-Inquiry-Stage-1/$file/Report-Havelock-North-Water-Inquiry-Stage-1.pdf">https://www.dia.govt.nz/vwluResources/Report-Havelock-North-Water-Inquiry-Stage-1/$file/Report-Havelock-North-Water-Inquiry-Stage-1.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Ministry of Health New Zealand</collab>
          </person-group>
          <article-title>A Secure Web-based Application Based on the Surveillance Information New Zealand (Survinz) Architecture</article-title>
          <source>Public Health Surveillance</source>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://surv.esr.cri.nz/episurv/index.php">https://surv.esr.cri.nz/episurv/index.php</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Milinovich</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jing</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using baidu search index to predict dengue outbreak in China</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>12</month>
          <day>1</day>
          <volume>6</volume>
          <fpage>38040</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/srep38040"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep38040</pub-id>
          <pub-id pub-id-type="medline">27905501</pub-id>
          <pub-id pub-id-type="pii">srep38040</pub-id>
          <pub-id pub-id-type="pmcid">PMC5131307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Freifeld</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nojima</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chunara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>A case study of the New York City 2012-2013 influenza season with daily geocoded Twitter data from temporal and spatiotemporal perspectives</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>10</month>
          <day>20</day>
          <volume>16</volume>
          <issue>10</issue>
          <fpage>e236</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/10/e236/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3416</pub-id>
          <pub-id pub-id-type="medline">25331122</pub-id>
          <pub-id pub-id-type="pii">v16i10e236</pub-id>
          <pub-id pub-id-type="pmcid">PMC4259880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bahk</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Use of internet search queries to enhance surveillance of foodborne illness</article-title>
          <source>Emerg Infect Dis</source>
          <year>2015</year>
          <month>11</month>
          <volume>21</volume>
          <issue>11</issue>
          <fpage>1906</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.3201/eid2111.141834"/>
          </comment>
          <pub-id pub-id-type="doi">10.3201/eid2111.141834</pub-id>
          <pub-id pub-id-type="medline">26485066</pub-id>
          <pub-id pub-id-type="pmcid">PMC4622232</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Forecasting Zika incidence in the 2016 Latin America outbreak combining traditional disease surveillance with search, social media, and news report data</article-title>
          <source>PLoS Negl Trop Dis</source>
          <year>2017</year>
          <month>01</month>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e0005295</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pntd.0005295"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pntd.0005295</pub-id>
          <pub-id pub-id-type="medline">28085877</pub-id>
          <pub-id pub-id-type="pii">PNTD-D-16-01733</pub-id>
          <pub-id pub-id-type="pmcid">PMC5268704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mansour</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Choucair</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nissen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatt</surname>
              <given-names>J</given-names>
            </name>
            <collab>Centers for Disease Control and Prevention</collab>
          </person-group>
          <article-title>Health department use of social media to identify foodborne illness - Chicago, Illinois, 2013-2014</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2014</year>
          <month>08</month>
          <day>15</day>
          <volume>63</volume>
          <issue>32</issue>
          <fpage>681</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6332a1.htm"/>
          </comment>
          <pub-id pub-id-type="medline">25121710</pub-id>
          <pub-id pub-id-type="pii">mm6332a1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4584908</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chunara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Monitoring influenza epidemics in China with search query from baidu</article-title>
          <source>PLoS ONE</source>
          <year>2013</year>
          <month>05</month>
          <day>30</day>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>e64323</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0064323</pub-id>
          <pub-id pub-id-type="medline">23750192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhary</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Real-time Disease Surveillance Using Twitter Data: Demonstration on Flu and Cancer</article-title>
          <source>Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining</source>
          <year>2013</year>
          <conf-name>KDD'13</conf-name>
          <conf-date>August 11-13, 2013</conf-date>
          <conf-loc>Washington, DC, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2487575.2487709</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Widener</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Using geolocated Twitter data to monitor the prevalence of healthy and unhealthy food references across the US</article-title>
          <source>Appl Geogr</source>
          <year>2014</year>
          <month>10</month>
          <volume>54</volume>
          <fpage>189</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1016/j.apgeog.2014.07.017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mawudeku</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blench</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Global Public Health Intelligence Network</article-title>
          <source>World Health Organization</source>
          <year>2006</year>
          <access-date>2020-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/docs/default-source/eios-gtm-2019-presentations/tanguay-phac---eios-gtm-2019.pdf?sfvrsn=8c758734_2">https://www.who.int/docs/default-source/eios-gtm-2019-presentations/tanguay-phac---eios-gtm-2019.pdf?sfvrsn=8c758734_2</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>XX</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Diwan</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Applying the zero-inflated Poisson model with random effects to detect abnormal rises in school absenteeism indicating infectious diseases outbreak</article-title>
          <source>Epidemiol Infect</source>
          <year>2018</year>
          <month>09</month>
          <volume>146</volume>
          <issue>12</issue>
          <fpage>1565</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1017/S095026881800136X</pub-id>
          <pub-id pub-id-type="medline">29843830</pub-id>
          <pub-id pub-id-type="pii">S095026881800136X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Boulton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Montgomery</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Syndromic surveillance for influenza in Tianjin, China: 2013-14</article-title>
          <source>J Public Health (Oxf)</source>
          <year>2017</year>
          <month>06</month>
          <day>1</day>
          <volume>39</volume>
          <issue>2</issue>
          <fpage>274</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1093/pubmed/fdw022</pub-id>
          <pub-id pub-id-type="medline">26968483</pub-id>
          <pub-id pub-id-type="pii">fdw022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scuffham</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Estimating influenza-related hospital admissions in older people from GP consultation data</article-title>
          <source>Vaccine</source>
          <year>2004</year>
          <month>07</month>
          <day>29</day>
          <volume>22</volume>
          <issue>21-22</issue>
          <fpage>2853</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2003.12.022</pub-id>
          <pub-id pub-id-type="medline">15246621</pub-id>
          <pub-id pub-id-type="pii">S0264410X0400026X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lucero-Obusan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Winston</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Schirmer</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Oda</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Holodniy</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Enhanced influenza surveillance using telephone triage and electronic syndromic surveillance in the department of veterans affairs, 2011-2015</article-title>
          <source>Public Health Rep</source>
          <year>2017</year>
          <volume>132</volume>
          <issue>1_suppl</issue>
          <fpage>16S</fpage>
          <lpage>22S</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28692402"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0033354917709779</pub-id>
          <pub-id pub-id-type="medline">28692402</pub-id>
          <pub-id pub-id-type="pmcid">PMC5676515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Widerström</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Omberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ferm</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Autoregressive Integrated Moving Average (ARIM) Modeling of Time Series of Local Telephone Triage Data for Syndromic Surveillance</article-title>
          <source>Online J Public Health Inform</source>
          <year>2014</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>-</fpage>
          <pub-id pub-id-type="doi">10.5210/ojphi.v6i1.5049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aparicio</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Filho</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Disease Surveillance Using Transaction Data in New Zealand</article-title>
          <source>The Innovation Fund</source>
          <year>2018</year>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://innovationfund.co.nz/sites/default/files/2018-12/disease-surveillance-transaction_20180411_RELEASED.pdf">https://innovationfund.co.nz/sites/default/files/2018-12/disease-surveillance-transaction_20180411_RELEASED.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boulos</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Resch</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Crowley</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Breslin</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Burtner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pike</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Jezierski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chuang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Crowdsourcing, citizen sensing and sensor web technologies for public and environmental health surveillance and crisis management: trends, OGC standards and application examples</article-title>
          <source>Int J Health Geogr</source>
          <year>2011</year>
          <month>12</month>
          <day>21</day>
          <volume>10</volume>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ij-healthgeographics.biomedcentral.com/articles/10.1186/1476-072X-10-67"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1476-072X-10-67</pub-id>
          <pub-id pub-id-type="medline">22188675</pub-id>
          <pub-id pub-id-type="pii">1476-072X-10-67</pub-id>
          <pub-id pub-id-type="pmcid">PMC3271966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thapen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Simmie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hankin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gillard</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Defender: detecting and forecasting epidemics using novel data-analytics for enhanced response</article-title>
          <source>PLoS One</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>5</issue>
          <fpage>e0155417</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0155417"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0155417</pub-id>
          <pub-id pub-id-type="medline">27192059</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-27997</pub-id>
          <pub-id pub-id-type="pmcid">PMC4871418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cumming</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gribben</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the Primary Health Care Strategy: Practice Data Analysis 2001-2005</article-title>
          <source>Health Services Research Centre Wellington</source>
          <year>2007</year>
          <access-date>2020-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hiirc.org.nz/page/26568/evaluation-of-the-primary-health-care-strategy/;jsessionid=8806E70F6450814D0231B7DC2B4B476D?tab=827&#38;contentType=419&#38;section=13414">https://www.hiirc.org.nz/page/26568/evaluation-of-the-primary-health-care-strategy/;jsessionid=8806E70F6450814D0231B7DC2B4B476D?tab=827&#38;contentType=419&#38;section=13414</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>St George</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Cullen</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>The Healthline pilot: call centre triage in New Zealand</article-title>
          <source>N Z Med J</source>
          <year>2001</year>
          <month>09</month>
          <day>28</day>
          <volume>114</volume>
          <issue>1140</issue>
          <fpage>429</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="medline">11700752</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>Trends</article-title>
          <source>Google Trends</source>
          <year>2019</year>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trends.google.com/trends/">https://trends.google.com/trends/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>Historical PowerTrack API</article-title>
          <source>Gnip</source>
          <year>2018</year>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://support.gnip.com/apis/historical_api2.0/">http://support.gnip.com/apis/historical_api2.0/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <article-title>Attendance Matters</article-title>
          <source>Ministry of Education, New Zealand</source>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.education.govt.nz/assets/Documents/School/Running-a-school/Managing-students/Managing-student-attendance/AttendanceMatters.pdf">http://www.education.govt.nz/assets/Documents/School/Running-a-school/Managing-students/Managing-student-attendance/AttendanceMatters.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adnan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peterkin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McLaughlin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>HL7 Middleware Framework for Laboratory Notifications for Notifiable Diseases</article-title>
          <source>Driving Reform: Digital Health is Everyone’s Business: Selected Papers from the 23rd Australian National Health Informatics Conference (HIC 2015)</source>
          <year>2015</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>IOS Press</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cmapspublic2.ihmc.us/rid=1Q75QXPSK-1VCDC6J-X0/2015%20-%20Driving%20reform%20Digital%20health%20is%20everyone">https://cmapspublic2.ihmc.us/rid=1Q75QXPSK-1VCDC6J-X0/2015%20-%20Driving%20reform%20Digital%20health%20is%20everyone</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Delen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Advanced Data Mining Techniques</source>
          <year>2008</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer Science &#38; Business Media</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dailey</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Plant</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Timeliness of data sources used for influenza surveillance</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2007</year>
          <volume>14</volume>
          <issue>5</issue>
          <fpage>626</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17600101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2328</pub-id>
          <pub-id pub-id-type="medline">17600101</pub-id>
          <pub-id pub-id-type="pii">M2328</pub-id>
          <pub-id pub-id-type="pmcid">PMC1975801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Bahk</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <article-title>Combined influence of multiple climatic factors on the incidence of bacterial foodborne diseases</article-title>
          <source>Sci Total Environ</source>
          <year>2018</year>
          <month>01</month>
          <day>1</day>
          <volume>610-611</volume>
          <fpage>10</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1016/j.scitotenv.2017.08.045</pub-id>
          <pub-id pub-id-type="medline">28802105</pub-id>
          <pub-id pub-id-type="pii">S0048-9697(17)32043-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Jalalpour</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gel</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Torcaso</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Igusa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rothman</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Influenza forecasting with Google flu trends</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e56176</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0056176"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0056176</pub-id>
          <pub-id pub-id-type="medline">23457520</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-29961</pub-id>
          <pub-id pub-id-type="pmcid">PMC3572967</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Combining search, social media, and traditional data sources to improve influenza surveillance</article-title>
          <source>PLoS Comput Biol</source>
          <year>2015</year>
          <month>10</month>
          <volume>11</volume>
          <issue>10</issue>
          <fpage>e1004513</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1004513"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004513</pub-id>
          <pub-id pub-id-type="medline">26513245</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-15-00856</pub-id>
          <pub-id pub-id-type="pmcid">PMC4626021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kou</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Accurate estimation of influenza epidemics using Google search data via ARGO</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2015</year>
          <month>11</month>
          <day>24</day>
          <volume>112</volume>
          <issue>47</issue>
          <fpage>14473</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=26553980"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1515373112</pub-id>
          <pub-id pub-id-type="medline">26553980</pub-id>
          <pub-id pub-id-type="pii">1515373112</pub-id>
          <pub-id pub-id-type="pmcid">PMC4664296</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>Create Space Time Cube</article-title>
          <source>ArcGIS</source>
          <year>2019</year>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://desktop.arcgis.com/en/arcmap/10.3/tools/space-time-pattern-mining-toolbox/create-space-time-cube.htm">http://desktop.arcgis.com/en/arcmap/10.3/tools/space-time-pattern-mining-toolbox/create-space-time-cube.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anselin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Local indicators of spatial association-LISA</article-title>
          <source>Geogr Anal</source>
          <year>1995</year>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>115</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1538-4632.1995.tb00338.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cutter</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Finch</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Temporal and spatial changes in social vulnerability to natural hazards</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2008</year>
          <month>03</month>
          <day>19</day>
          <volume>105</volume>
          <issue>7</issue>
          <fpage>2301</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=18268336"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.0710375105</pub-id>
          <pub-id pub-id-type="medline">18268336</pub-id>
          <pub-id pub-id-type="pii">0710375105</pub-id>
          <pub-id pub-id-type="pmcid">PMC2268131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hardisty</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Klippel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Analysing spatio-temporal autocorrelation with LISTA-Viz</article-title>
          <source>Int J Geogr Inf Sci</source>
          <year>2010</year>
          <month>10</month>
          <day>11</day>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>1515</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.1080/13658816.2010.511717</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Epidemiological analysis, detection, and comparison of space-time patterns of Beijing hand-foot-mouth disease (2008-2012)</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e92745</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0092745"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0092745</pub-id>
          <pub-id pub-id-type="medline">24663329</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-54211</pub-id>
          <pub-id pub-id-type="pmcid">PMC3963949</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amara</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ayadi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The local geographies of welfare in Tunisia: does neighbourhood matter?</article-title>
          <source>Int J Soc Welf</source>
          <year>2012</year>
          <month>01</month>
          <day>20</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>90</fpage>
          <lpage>103</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1468-2397.2011.00863.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>How Local Outlier Analysis Works</article-title>
          <source>ArcGIS Pro</source>
          <access-date>2020-08-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://pro.arcgis.com/en/pro-app/tool-reference/space-time-pattern-mining/learnmorelocaloutlier.htm">http://pro.arcgis.com/en/pro-app/tool-reference/space-time-pattern-mining/learnmorelocaloutlier.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Twitter improves influenza forecasting</article-title>
          <source>PLoS Curr</source>
          <year>2014</year>
          <month>10</month>
          <day>28</day>
          <volume>6</volume>
          <fpage>-</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1371/currents.outbreaks.90b9ed0f59bae4ccaa683a39865d9117"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/currents.outbreaks.90b9ed0f59bae4ccaa683a39865d9117</pub-id>
          <pub-id pub-id-type="medline">25642377</pub-id>
          <pub-id pub-id-type="pmcid">PMC4234396</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mabuza</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Coetzee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Durrheim</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Using the SaTScan method to detect local malaria clusters for guiding malaria control programmes</article-title>
          <source>Malar J</source>
          <year>2009</year>
          <month>04</month>
          <day>17</day>
          <volume>8</volume>
          <fpage>68</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://malariajournal.biomedcentral.com/articles/10.1186/1475-2875-8-68"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1475-2875-8-68</pub-id>
          <pub-id pub-id-type="medline">19374738</pub-id>
          <pub-id pub-id-type="pii">1475-2875-8-68</pub-id>
          <pub-id pub-id-type="pmcid">PMC2679049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henly</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tuli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kluberg</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>QC</given-names>
            </name>
            <name name-style="western">
              <surname>Anema</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maharana</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Disparities in digital reporting of illness: a demographic and socioeconomic assessment</article-title>
          <source>Prev Med</source>
          <year>2017</year>
          <month>08</month>
          <volume>101</volume>
          <fpage>18</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0091-7435(17)30172-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2017.05.009</pub-id>
          <pub-id pub-id-type="medline">28528170</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(17)30172-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC5553633</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Twitter-based influenza detection after flu peak via tweets with indirect information: text mining study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2018</year>
          <month>09</month>
          <day>25</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e65</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2018/3/e65/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.8627</pub-id>
          <pub-id pub-id-type="medline">30274968</pub-id>
          <pub-id pub-id-type="pii">v4i3e65</pub-id>
          <pub-id pub-id-type="pmcid">PMC6231889</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jorder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Using online reviews by restaurant patrons to identify unreported cases of foodborne illness</article-title>
          <source>Mmwr-Morbid Mortal W</source>
          <year>2014</year>
          <volume>63</volume>
          <issue>20</issue>
          <fpage>4915</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://web2.cs.columbia.edu/~gravano/Papers/2014/cdc2014.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waldner</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Big Data for Infectious Diseases Surveillance and the Potential Contribution to the Investigation of Foodborne Disease in Canada</article-title>
          <source>National Collaborating Centre for Infectious Diseases</source>
          <year>2017</year>
          <access-date>2020-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nccid.ca/publications/big-data-for-infectious-diseases-surveillance/">https://nccid.ca/publications/big-data-for-infectious-diseases-surveillance/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Integrating multiple data sources and learning models to predict infectious diseases in China</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>680</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31259024"/>
          </comment>
          <pub-id pub-id-type="medline">31259024</pub-id>
          <pub-id pub-id-type="pmcid">PMC6568090</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chae</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Predicting infectious disease using deep learning and big data</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2018</year>
          <month>07</month>
          <day>27</day>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>680</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph15081596"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph15081596</pub-id>
          <pub-id pub-id-type="medline">30060525</pub-id>
          <pub-id pub-id-type="pii">ijerph15081596</pub-id>
          <pub-id pub-id-type="pmcid">PMC6121625</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
