<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e34982</article-id>
      <article-id pub-id-type="pmid">36719726</article-id>
      <article-id pub-id-type="doi">10.2196/34982</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Gastroenteritis Forecasting Assessing the Use of Web and Electronic Health Record Data With a Linear and a Nonlinear Approach: Comparison Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Bradley</surname>
            <given-names>Heather</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Staffini</surname>
            <given-names>Alessio</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cheong</surname>
            <given-names>Yoon Ling</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sükei</surname>
            <given-names>Emese</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Poirier</surname>
            <given-names>Canelle</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff02" ref-type="aff">2</xref>
          <address>
            <institution>Computational Health Informatics Program</institution>
            <institution>Boston Children's Hospital</institution>
            <addr-line>300 Longwood Avenue</addr-line>
            <addr-line>Boston, MA, 02115</addr-line>
            <country>United States</country>
            <phone>1 617 355 6000</phone>
            <email>canelle.poirier@outlook.fr</email>
          </address>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6972-2621</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Bouzillé</surname>
            <given-names>Guillaume</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3637-6558</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Bertaud</surname>
            <given-names>Valérie</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0293-5531</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Cuggia</surname>
            <given-names>Marc</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6943-3937</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Santillana</surname>
            <given-names>Mauricio</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff02" ref-type="aff">2</xref>
          <xref rid="aff06" ref-type="aff">6</xref>
          <xref rid="aff07" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4206-418X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lavenu</surname>
            <given-names>Audrey</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff08" ref-type="aff">8</xref>
          <xref rid="aff09" ref-type="aff">9</xref>
          <xref rid="aff10" ref-type="aff">10</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0049-2397</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Department of Pediatrics</institution>
        <institution>Harvard Medical School</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>Computational Health Informatics Program</institution>
        <institution>Boston Children's Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Institut national de la santé et de la recherche médicale U1099</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>Laboratoire Traitement du Signal et de l'Image</institution>
        <institution>Université de Rennes 1</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Centre de Données Cliniques</institution>
        <institution>Centre Hospitalier Universitaire Rennes</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Harvard Tseng-Hsi Chan School of Public Health</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>Machine Intelligence Group for the Betterment of Health and the Environment</institution>
        <institution>Network Science Institute</institution>
        <institution>Northeastern University</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Faculté de médecine</institution>
        <institution>Université de Rennes 1</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Institut de Recherche Mathématique de Rennes</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>Institut national de la santé et de la recherche médicale CIC 1414</institution>
        <institution>Université de Rennes 1</institution>
        <addr-line>Rennes</addr-line>
        <country>France</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Canelle Poirier <email>canelle.poirier@outlook.fr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>1</month>
        <year>2023</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e34982</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>24</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>11</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Canelle Poirier, Guillaume Bouzillé, Valérie Bertaud, Marc Cuggia, Mauricio Santillana, Audrey Lavenu. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 31.01.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2023/1/e34982" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Disease surveillance systems capable of producing accurate real-time and short-term forecasts can help public health officials design timely public health interventions to mitigate the effects of disease outbreaks in affected populations. In France, existing clinic-based disease surveillance systems produce gastroenteritis activity information that lags real time by 1 to 3 weeks. This temporal data gap prevents public health officials from having a timely epidemiological characterization of this disease at any point in time and thus leads to the design of interventions that do not take into consideration the most recent changes in dynamics.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The goal of this study was to evaluate the feasibility of using internet search query trends and electronic health records to predict acute gastroenteritis (AG) incidence rates in near real time, at the national and regional scales, and for long-term forecasts (up to 10 weeks).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We present 2 different approaches (linear and nonlinear) that produce real-time estimates, short-term forecasts, and long-term forecasts of AG activity at 2 different spatial scales in France (national and regional). Both approaches leverage disparate data sources that include disease-related internet search activity, electronic health record data, and historical disease activity.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our results suggest that all data sources contribute to improving gastroenteritis surveillance for long-term forecasts with the prominent predictive power of historical data owing to the strong seasonal dynamics of this disease.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The methods we developed could help reduce the impact of the AG peak by making it possible to anticipate increased activity by up to 10 weeks.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>infectious disease</kwd>
        <kwd>acute gastroenteritis</kwd>
        <kwd>modeling</kwd>
        <kwd>modeling disease outbreaks</kwd>
        <kwd>machine learning</kwd>
        <kwd>public health</kwd>
        <kwd>machine learning in public health</kwd>
        <kwd>forecasting</kwd>
        <kwd>digital data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Acute gastroenteritis (AG) is a major public health problem worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. Commonly defined as diarrhea or vomiting in the past 24 hours [<xref ref-type="bibr" rid="ref2">2</xref>], AG is one of the main causes of morbidity and mortality among young people and causes up to 2.5 million deaths per year in children aged &lt;5 years around the world [<xref ref-type="bibr" rid="ref3">3</xref>]. Although it is generally a mild disease, its morbidity and economic burden are high [<xref ref-type="bibr" rid="ref4">4</xref>]. In France, there are &gt;21 million episodes of AG each year [<xref ref-type="bibr" rid="ref5">5</xref>]. Although AG episodes occur throughout the year, there is a winter peak, mainly owing to norovirus and rotavirus [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. During these peaks, the increase of visits to general practitioners and emergency or pediatric departments causes health care system disruptions [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>Disease surveillance systems capable of producing accurate real-time and short-term forecasts can help public health officials design timely public health interventions to mitigate the effects of disease outbreaks in affected populations. In France, all acute diarrhea cases seen during medical appointments are reported weekly by volunteer outpatient health care providers. An estimation of AG incidence rate is then computed, at the national or regional scale, by considering the number of sentinel physicians and the medical density of the area of interest [<xref ref-type="bibr" rid="ref9">9</xref>]. However, data collection, processing, aggregation, and distribution processes introduce up to 3 weeks of delay in the availability of AG activity information. This temporal data gap prevents public health officials from having a timely perspective about AG activity and thus leads to the design of interventions that do not take into consideration the most recent changes in disease dynamics. Therefore, there is a growing interest in finding new ways to mitigate this information gap [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>To alleviate this time lag, several studies have proposed approaches to produce accurate and reliable real-time disease activity estimates, for example, to monitor influenza [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. For AG, studies have been focused on identifying the clinical characteristics of the disease. Norovirus and rotavirus are the viruses responsible for most gastroenteritis outbreaks [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. This disease has a strong wintertime seasonality, but this seasonality could be affected by the climate change, which would affect norovirus transmission, host’s susceptibility to norovirus infection, and resistance of norovirus to environmental conditions. This may cause large oscillations in the number of cases per year [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. AG remains as a major cause of hospitalizations, especially for children, and the use of a vaccine could help to decrease the impact of the disease [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Some research teams have assessed the correlation between data sources (eg, drug reimbursement data and emergency department visits) and general practitioner visits for AG [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Other studies have shown a significant correlation between internet search query trends and AG incidence rates in different locations such as the United States, Mexico, the United Kingdom, and France [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. However, none, to the best of our knowledge [<xref ref-type="bibr" rid="ref22">22</xref>], have proposed a feasible methodology to forecast AG activity. Through this study, we investigated the challenges of achieving this and proposed a reliable forecasting approach.</p>
      </sec>
      <sec>
        <title>State of the Art</title>
        <p>Existing forecasting systems for other disease outbreaks, such as influenza, include statistical models that leverage information available in near real time [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. One of the first and prominent studies is Google Flu Trends [<xref ref-type="bibr" rid="ref23">23</xref>], a web-based service operated by Google. Created in 2009, the platform used the volume of selected Google search terms to estimate influenza activity in real time. However, the web service was stopped following several prediction errors owing to changes in people’s search behavior as a result of the exceptional nature of the pandemic or owing to the announcement of a pandemic that finally did not appear [<xref ref-type="bibr" rid="ref24">24</xref>]. Following this, some authors updated the Google Flu Trends algorithm to improve influenza forecasting, by including data from Google Correlate and Google Trends web services and other sources, for instance, historical influenza information [<xref ref-type="bibr" rid="ref11">11</xref>]. Internet is not the only data source that can be used to produce information in real time. With the widespread adoption of patient electronic health records (EHRs), hospitals also generate a huge amount of data. Bouzillé et al [<xref ref-type="bibr" rid="ref25">25</xref>] showed that EHRs are strongly correlated with influenza incidence rates. Some authors proposed statistical models using EHRs to predict influenza incidence rates in real time [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. In addition, other studies showed that internet users’ searches were strongly correlated with influenza epidemics and other diseases, including AG [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>In this study, we evaluated the feasibility of using internet search query trends and EHR to predict AG incidence rates in near real time, at the national and regional scales, and for long-term forecasts (up to 10 weeks). We used 2 different methods—a linear approach using Elastic Net and a nonlinear approach using random forest (RF). In addition, as AG outbreaks cause disruptions in hospitals and emergency departments, we estimated AG incidence rates at the level of emergency departments and hospital stays.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Variables to Be Predicted</title>
        <sec>
          <title>National Level</title>
          <p>We obtained the national (Metropolitan France) acute diarrhea weekly incidence rates (per 100,000 inhabitants) from the French Sentinel network [<xref ref-type="bibr" rid="ref27">27</xref>], from January 2008 to March 2018. We retrieved these data in April 2018.</p>
        </sec>
        <sec>
          <title>Regional Level</title>
          <p>We obtained the regional (Brittany region) acute diarrhea incidence rates (per 100,000 inhabitants) from the French Sentinel network [<xref ref-type="bibr" rid="ref27">27</xref>], from January 2008 to March 2018. We chose the Brittany region as we used her data from a hospital in Brittany. We retrieved these data in April 2018.</p>
        </sec>
      </sec>
      <sec>
        <title>Predictive Variables</title>
        <sec>
          <title>Web Data</title>
          <p>We obtained the frequency per week of the 100 most correlated French queries from Google Correlate [<xref ref-type="bibr" rid="ref28">28</xref>]. For each signal to be predicted (national and regional levels), we retrieved Google Correlate data for the period from January 2008 to March 2018. As our prediction period is from May 2014 to February 2018, the correlation was calculated from January 2008 to April 2014. All signals were normalized to obtain mean 0 and SD 1 before calculating the correlation. The reason to correlate was to choose the most appropriate queries to predict the outbreak without previous knowledge [<xref ref-type="bibr" rid="ref29">29</xref>]. The most correlated queries obtained for national and regional levels can differ because the weekly incidence rates for France and Brittany are different.</p>
        </sec>
        <sec>
          <title>Clinical Data</title>
          <p>We used data from the clinical data warehouse (CDW) of Rennes University Hospital (France), called entrepôt de données de l’HÔPital (eHOP). This CDW includes structured (laboratory test results, prescriptions, and International Statistical Classification of Diseases and Related Health Problems 10th Revision diagnoses) and unstructured (discharge letter, pathology reports, and operative reports) patients’ data from 1.2 million inpatients and outpatients and 45 million documents. To identify patients with specific criteria, eHOP has its own search engine system that allows to query unstructured data with keywords or structured data with codes based on terminologies.</p>
          <p>First, to retrieve clinical data connected with AG, we performed different full-text queries (related to gastroenteritis, its symptoms, virus, or treatments). These queries allowed to obtain all documents matching with the search criteria (often, several documents for 1 patient and 1 stay). Then, for each week, we kept the oldest document for 1 patient and 1 hospital stay, and we calculated the number of hospital stays with at least one document mentioning the keyword contained in the query. As we used 19 keywords, we obtained 19 variables from CDW eHOP.</p>
          <p>Then, we built a database containing the time series constructed from the structured data (total n=1,335,347 time series). Regrading Google Correlate, we calculated the Pearson correlation between both national and regional incidence rates and the time series from the database. We retrieved the 100 most correlated signals. As our prediction period is from May 2014 to February 2018, we calculated the correlation between January 2008 and April 2014.</p>
          <p>Overall, we obtained 119 variables (n=19, 15.9% of variables from the full-text queries and n=100, 84% of the most correlated variables from the structured data). The 100 most correlated variables can be different for national and regional levels. We retrieved EHR data for the period from January 2008 to March 2018 in April 2018. All these data could be extracted in real time if needed.</p>
        </sec>
        <sec>
          <title>Historical Data</title>
          <p>We used the incidence rates for the previous 52 weeks as predictive variables, for both national and regional levels.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study was approved by the local ethics committee of the Rennes Academic Hospital (approval number 16.69).</p>
      </sec>
      <sec>
        <title>Statistical Models</title>
        <sec>
          <title>Linear Approach</title>
          <p>To minimize the negative effects of using a large number of input variables, potentially including redundant information, we used Elastic Net, a regularized multivariate regression methodology that can identify parsimonious models [<xref ref-type="bibr" rid="ref30">30</xref>]. Elastic Net combines the power of Lasso and Ridge regressions, allowing to perform a variable selection on variables that are highly correlated [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. We performed the Elastic Net regression analysis using the <italic>caret</italic> package in R (R Foundation for Statistical Computing) and the associated function fit with the <italic>glmnet</italic> method [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. We fixed a coefficient λ=0.5 to give the same importance to Ridge and Lasso methods.</p>
          <p>The formulation of our model is the following:</p>
          <disp-formula>
            <graphic xlink:href="publichealth_v9i1e34982_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, yT denotes AG incidence rate at time T=t, t+1, t+2, t+3 (for the different levels of prediction), <inline-graphic xlink:href="publichealth_v9i1e34982_fig11.png" xlink:type="simple" mimetype="image"/> denotes historical variables, <inline-graphic xlink:href="publichealth_v9i1e34982_fig12.png" xlink:type="simple" mimetype="image"/> denotes Google data, <inline-graphic xlink:href="publichealth_v9i1e34982_fig13.png" xlink:type="simple" mimetype="image"/> denotes EHR data, and <inline-graphic xlink:href="publichealth_v9i1e34982_fig14.png" xlink:type="simple" mimetype="image"/> denotes residuals.</p>
          <p>For a given week, we needed to find the parameters, α=(α<sub>1</sub>,..α<sub>52</sub>), β=(β<sub>1</sub>,..β<sub>100</sub>), and γ=(γ<sub>1</sub>,..γ<sub>119</sub>), that minimize the following:</p>
          <disp-formula>
            <graphic xlink:href="publichealth_v9i1e34982_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>Here, <inline-graphic xlink:href="publichealth_v9i1e34982_fig16.png" xlink:type="simple" mimetype="image"/> are hyperparameters of the Elastic Net regression. We used 10-block cross-validation to optimize the parameters. All parameters (α=[α<sub>1</sub>,..α<sub>52</sub>], β=[β<sub>1</sub>,..β<sub>100</sub>], and γ=[γ<sub>1</sub>,..γ<sub>119</sub>]) were dynamically trained every week with a rolling window using all data available. In this way, the size of our training data set increased every week. For example, for the first week of January 2015, our training data set ranged from January 2008 to the last week of December 2014. To predict the first week of January 2016, our training data set ranged from January 2008 to the last week of December 2015. We obtained estimates from May 2014 to February 2018.</p>
        </sec>
        <sec>
          <title>Nonlinear Approach</title>
          <p>RF is a nonlinear machine learning approach based on the construction of multiple decision trees using the general bootstrap aggregating technique (known as bagging) [<xref ref-type="bibr" rid="ref35">35</xref>]. We used this method as it showed good performance in short-term forecasting even when it is compared with other machine learning approaches such as support vector machine or neural network or a traditional approach such as autoregressive integrated moving average [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p>
          <p>With RF, the AG incidence rates are obtained with the following:<inline-graphic xlink:href="publichealth_v9i1e34982_fig17.png" xlink:type="simple" mimetype="image"/></p>
          <p>Here, y<sub>T</sub> denotes AG incidence rate at time T=t, t+1, t+2, t+3 (for the different levels of prediction) and <inline-graphic xlink:href="publichealth_v9i1e34982_fig18.png" xlink:type="simple" mimetype="image"/> denotes AG incidence rates estimate obtained with the decision tree b. We used the R package, <italic>randomForest</italic> [<xref ref-type="bibr" rid="ref38">38</xref>], to create our RF models. The hyperparameters corresponding to the number of decision trees and the number of variables randomly sampled at each split were optimized on a training data set from January 2008 to May 2014. Then, regarding the Elastic Net model, RF was dynamically recalibrated for every new week of prediction by incorporating all the data available. We obtained estimates from May 2014 to February 2018.</p>
        </sec>
        <sec>
          <title>Contribution of Each Data Source</title>
          <p>In addition, to assess the contribution of each individual data sources or their combinations, we built Elastic Net and RF models using the following predictive variables:</p>
          <list list-type="order">
            <list-item>
              <p>AG incidence rates—baseline model called autoregressive model of order 52 (AR(52)) in the following sections—for the previous 52 weeks</p>
            </list-item>
            <list-item>
              <p>Google data</p>
            </list-item>
            <list-item>
              <p>EHR data</p>
            </list-item>
            <list-item>
              <p>Google data and AR(52)</p>
            </list-item>
            <list-item>
              <p>EHR data and AR(52)</p>
            </list-item>
            <list-item>
              <p>Google data and EHR data</p>
            </list-item>
          </list>
        </sec>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>To assess the performance of our models, we compared our estimates with the real incidence rates from the Sentinel network. We calculated the root mean squared error and the Pearson correlation coefficient for our test period starting from May 2014 to February 2018. The model allowing to obtain the most accurate estimates is the one having the highest correlation and the lowest error:</p>
        <list list-type="order">
          <list-item>
            <p>
              <disp-formula>
                <graphic xlink:href="publichealth_v9i1e34982_fig19.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
          </list-item>
          <list-item>
            <p>
              <disp-formula>
                <graphic xlink:href="publichealth_v9i1e34982_fig20.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
          </list-item>
        </list>
        <p>Here, <inline-graphic xlink:href="publichealth_v9i1e34982_fig21.png" xlink:type="simple" mimetype="image"/> is the predicted value for the week t, <inline-graphic xlink:href="publichealth_v9i1e34982_fig22.png" xlink:type="simple" mimetype="image"/> is the mean of predicted values, y<sub>t</sub> is the real value for the week t, and <inline-graphic xlink:href="publichealth_v9i1e34982_fig23.png" xlink:type="simple" mimetype="image"/> is the mean of real values.</p>
      </sec>
      <sec>
        <title>Comparison With Influenza</title>
        <p>As we used a method developed for influenza outbreaks, we compared the results obtained for AG with those obtained for influenza. The aim was to determine whether external data sources are as relevant for AG as for influenza. We started by comparing the stationarity and the seasonality of both time series by calculating the following:</p>
        <p>1. The autocorrelation function (ACF), allowing to determine the autocorrelation between y<sub>t</sub> and y<sub>t–h</sub>:</p>
        <disp-formula>
          <graphic xlink:href="publichealth_v9i1e34982_fig24.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where γ(h)=cov(y<sub>t</sub>, y<sub>t–h</sub>)</p>
        <p>2. The partial ACF (PACF), allowing to determine the autocorrelation between y<sub>t</sub> and y<sub>t–h</sub> after removing the autocorrelation between the intermediate variables y<sub>t–1</sub>,...,y<sub>t–h+1</sub>:</p>
        <p>r(h)=corr(y<sub>t</sub>,y<sub>t–h</sub>|y<sub>t–1</sub>,...,y<sub>t–h+1</sub>)</p>
        <p>Then, we compared the accuracy of estimates for forecast up to 10 weeks with Elastic Net and RF models using only historical data or combining Google, EHR, and historical data.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>First, we studied the impact of each data source for short-term forecasts with the 2 different approaches already used to predict influenza outbreaks—a linear approach with the Elastic Net model and a nonlinear approach with an RF model.</p>
        <p>Then, we analyzed the AG and influenza time series, especially the seasonality, to better understand the differences between the 2 diseases.</p>
        <p>Finally, we compared AG and influenza results obtained for long-term forecasts with the 2 approaches, and we assessed the impact of external data sources to increase the accuracy of our estimates.</p>
      </sec>
      <sec>
        <title>Linear Approach</title>
        <sec>
          <title>Overview</title>
          <p>At the national and regional levels, in terms of error, the lowest values are obtained with models using historical data and external data sources (<xref ref-type="table" rid="table1">Table 1</xref>). At the national level, in terms of error, both data sources, Google and EHR produce the most accurate estimates compared with the model using only historical data—AR (52). At the regional level, the model using only historical data and EHR allows to obtain lower errors than the model using historical data and both Google and EHR data.</p>
          <p>In terms of correlation, in most cases, at the national and regional levels, the model using only historical data allows to obtain the highest values.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>PCC<sup>a</sup> and RMSE<sup>b</sup> values obtained for the entire prediction period (May 2014 to March 2018) at the national and regional levels, with all the combinations of data sources.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="260"/>
              <col width="100"/>
              <col width="100"/>
              <col width="70"/>
              <col width="100"/>
              <col width="70"/>
              <col width="100"/>
              <col width="70"/>
              <col width="100"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Levels and data sources</td>
                  <td colspan="2">Real time</td>
                  <td colspan="2">1-week forecast</td>
                  <td colspan="2">2-week forecast</td>
                  <td colspan="2">3-week forecast</td>
                </tr>
                <tr valign="top">
                  <td colspan="2">
                    <break/>
                  </td>
                  <td>PCC</td>
                  <td>RMSE</td>
                  <td>PCC</td>
                  <td>RMSE</td>
                  <td>PCC</td>
                  <td>RMSE</td>
                  <td>PCC</td>
                  <td>RMSE</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="10">
                    <bold>National</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52)<sup>c</sup></td>
                  <td>
                    <italic>0.946</italic>
                    <sup>d</sup>
                  </td>
                  <td>
                    <italic>16.16</italic>
                  </td>
                  <td>
                    <italic>0.910</italic>
                  </td>
                  <td>22.69</td>
                  <td>
                    <italic>0.898</italic>
                  </td>
                  <td>26.95</td>
                  <td>
                    <italic>0.884</italic>
                  </td>
                  <td>30.69</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google</td>
                  <td>0.830</td>
                  <td>42.75</td>
                  <td>0.803</td>
                  <td>44.99</td>
                  <td>0.801</td>
                  <td>41.27</td>
                  <td>0.770</td>
                  <td>38.96</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>EHR<sup>e</sup></td>
                  <td>0.477</td>
                  <td>48.35</td>
                  <td>0.512</td>
                  <td>45.59</td>
                  <td>0.489</td>
                  <td>47.37</td>
                  <td>0.519</td>
                  <td>44.65</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and Google</td>
                  <td>
                    <italic>0.941</italic>
                  </td>
                  <td>18.10</td>
                  <td>0.896</td>
                  <td>24.17</td>
                  <td>0.871</td>
                  <td>26.98</td>
                  <td>0.847</td>
                  <td>28.24</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and EHR</td>
                  <td>0.932</td>
                  <td>
                    <italic>16.41</italic>
                  </td>
                  <td>0.880</td>
                  <td>21.58</td>
                  <td>0.820</td>
                  <td>26.15</td>
                  <td>0.823</td>
                  <td>
                    <italic>25.93</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google and EHR</td>
                  <td>0.836</td>
                  <td>36.09</td>
                  <td>0.846</td>
                  <td>34.48</td>
                  <td>0.779</td>
                  <td>34.23</td>
                  <td>0.795</td>
                  <td>32.32</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52), Google, and EHR</td>
                  <td>0.936</td>
                  <td>21.26</td>
                  <td>0.903</td>
                  <td>
                    <italic>20.94</italic>
                  </td>
                  <td>0.856</td>
                  <td>
                    <italic>24.16</italic>
                  </td>
                  <td>0.845</td>
                  <td>
                    <italic>25.33</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td colspan="10">
                    <bold>Regional</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52)</td>
                  <td>0.725</td>
                  <td>
                    <italic>40.75</italic>
                  </td>
                  <td>
                    <italic>0.705</italic>
                  </td>
                  <td>44.18</td>
                  <td>
                    <italic>0.670</italic>
                  </td>
                  <td>47.65</td>
                  <td>
                    <italic>0.681</italic>
                  </td>
                  <td>49.12</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google</td>
                  <td>0.652</td>
                  <td>65.84</td>
                  <td>0.603</td>
                  <td>64.79</td>
                  <td>0.594</td>
                  <td>60.33</td>
                  <td>0.596</td>
                  <td>61.67</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>EHR</td>
                  <td>0.462</td>
                  <td>59.83</td>
                  <td>0.538</td>
                  <td>55.62</td>
                  <td>0.546</td>
                  <td>55.87</td>
                  <td>0.582</td>
                  <td>52.90</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and Google</td>
                  <td>
                    <italic>0.738</italic>
                  </td>
                  <td>42.07</td>
                  <td>0.665</td>
                  <td>46.44</td>
                  <td>0.616</td>
                  <td>47.82</td>
                  <td>0.619</td>
                  <td>47.74</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and EHR</td>
                  <td>0.697</td>
                  <td>
                    <italic>40.99</italic>
                  </td>
                  <td>0.685</td>
                  <td>
                    <italic>42.38</italic>
                  </td>
                  <td>0.637</td>
                  <td>
                    <italic>46.48</italic>
                  </td>
                  <td>0.634</td>
                  <td>
                    <italic>46.31</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google and EHR</td>
                  <td>0.608</td>
                  <td>60.70</td>
                  <td>0.610</td>
                  <td>60.97</td>
                  <td>0.615</td>
                  <td>57.50</td>
                  <td>0.628</td>
                  <td>59.72</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52), Google, and EHR</td>
                  <td>0.724</td>
                  <td>42.12</td>
                  <td>0.689</td>
                  <td>45.24</td>
                  <td>0.646</td>
                  <td>47.37</td>
                  <td>0.620</td>
                  <td>52.19</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>PCC: Pearson correlation coefficient.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>RMSE: root mean squared error.</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>AR(52): autoregressive model of order 52.</p>
              </fn>
              <fn id="table1fn4">
                <p><sup>d</sup>Italicization highlights the 2 highest correlations and lowest errors obtained with the models for real time and 1-week, 2-week, and 3-week forecasts.</p>
              </fn>
              <fn id="table1fn5">
                <p><sup>e</sup>EHR: electronic health record.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>National Analysis</title>
          <p>For real-time estimates, the error values range from 48.4 to 16.2 and the correlation values range from 0.83 to 0.95, with the lowest error and the highest correlation obtained with the model using only historical data—AR(52). For 1-week estimates, the error values range from 45.6 to 20, with the lowest error and the highest correlation obtained with the model using historical data and both external data sources, Google and EHR. In terms of correlation, the correlation values range from 0.51 to 0.91, with the highest value obtained with the model using only historical data. For 2-week and 3-week estimates, we have similar results, with error values ranging from 47.4 to 24.2 and 44.6 to 25.3, respectively, obtained with the model using historical data and both external data sources, Google and EHR. In terms of correlation, the values range from 0.49 to 0.90 and from 0.52 to 0.88, respectively, with the highest correlation obtained with AR(52) model.</p>
          <p><xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates the estimates obtained at the national level for forecasts up to 3 weeks with the model using only historical data and the model using historical data and both data sources, Google and EHR. For real-time estimates, the results obtained with the 2 models are comparable, but for long-term forecasts (1, 2, and 3 weeks), the estimates obtained with the AR(52) model are delayed. In addition, the model using only historical data tends to smooth estimates and overestimate between peaks.</p>
          <p><xref rid="figure2" ref-type="fig">Figure 2</xref> is a visualization of the values of the coefficients for the model using historical data and both data sources, Google and EHR. For real-time estimates, the heat map shows that the model uses multiple variables from all data sources, such as historical data, Google data, and EHR data. Similar plots are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for long-term estimates.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>National level. Predictions up to 3 weeks obtained at the national level with the model using only historical data and the model using historical data and both data sources, Google and EHR. Gold standard, French Sentinel network data. EHR: electronic health record.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>National level. Heatmap of the coefficients. Each line of the heatmap corresponds to one predictive variable used in the model and each point of the line corresponds to 1 week predicted. The first block of variables corresponds to electronic health record (EHR) data, the second one corresponds to Google data, and the third one to historical data. In blue, a negative coefficient is associated with the variable, whereas in red, it is a positive coefficient. The white color means that the predictive variable is not selected by the model and does not participate in forecasting the corresponding week. In yellow, highlighted variables that are kept by the model almost all the time. For EHR data, it corresponds to the predictive variables for the keywords “Autres deficits immunitaires,” “Autre virus grippal identifié,” “Streptococcus pneumoniae,” “Pneumopathie,” “Virus respiratoire syncytial.” For Google data, it is the keywords: “enero,” “enterite,” “epidemie de gastro,” “gastro entérite,” “ski massif central.” For historical data, it corresponds to the previous week as well as week 17, week 18, week 25, and week 48 before the one we want to predict.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Regional Analysis</title>
          <p>For real-time estimates, the error values range from 65.8 to 40.8 and the correlation values range from 0.46 to 0.74, with the lowest value for the error obtained with the model using only historical data and the highest value for the correlation obtained with the model using historical data and Google data. For 1-week, 2-week, and 3-week estimates, the error values range from 64.8 to 42.4, from 60.3 to 46.5, and from 61.7 to 46.3, respectively. The lowest errors values for long-term forecasts are all obtained with the model using historical data and EHR data. In terms of 1-week, 2-week, and 3-week correlation, the values range from 0.54 to 0.71, from 0.55 to 0.67, and from 0.58 to 0.68, respectively. The highest correlations for long-term forecasts are all obtained with the model using only historical data—AR(52).</p>
          <p><xref rid="figure3" ref-type="fig">Figure 3</xref> illustrates the estimates obtained at the regional level for forecasts up to 3 weeks with the model using only historical data and the model using historical data and both data sources, Google and EHR. At the national level, for real-time estimates, the results obtained with the 2 models are comparable, but for long-term forecasts, the estimates obtained with the AR(52) model are delayed and tend to be smoothed and overestimated between peaks.</p>
          <p>The heat map (<xref rid="figure4" ref-type="fig">Figure 4</xref>) shows that for real-time estimates at the regional level, the model uses multiple variables from historical data (approximately 11 variables) and low number of variables from Google data (approximately 10 variables) and EHR data (approximately 9 variables) compared with those at the national level. Similar plots are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for long-term estimates.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Regional level. Predictions up to 3 weeks obtained at the regional level with the model using only historical data and the model using historical data and both data sources, Google and EHR. Gold standard, French Sentinel network data. EHR: electronic health record.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Regional level. Heatmap of the coefficients. Each line of the heatmap corresponds to one predictive variable used in the model and each point of the line corresponds to 1 week predicted. The first block of variables corresponds to electronic health record (EHR) data, the second one corresponds to Google data, and the third one to historical data. In blue, a negative coefficient is associated with the variable, whereas in red, it is a positive coefficient. The white color means that the predictive variable is not selected by the model and does not participate in forecasting the corresponding week. In yellow, highlighted variables that are kept by the model almost all the time. For EHR data it corresponds to the predictive variables for the keywords “Par voie sous cutannée,” “Autre virus grippal identifié,” “Voies respiratoires. Virus non identifié,” “Pneumopathie,” “Bronchiolite aigüe,” “Virus respiratoire syncytial,” “Bronchite,” “Ventre.” For Google data, it is the keywords: “enero,” “gastro enterite,” “gastro entérite,” “fixations.” For historical data, it corresponds to the two previous weeks as well as week 10, week 15, week 17, week 20, week 25, and week 48 before the one we want to predict.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Nonlinear Approach</title>
        <sec>
          <title>Overview</title>
          <p>For the nonlinear approach, at the national level, in terms of error and correlation, results are comparable between the model using only historical data—AR(52)—and the models combining historical data and external data sources (<xref ref-type="table" rid="table2">Table 2</xref>). At the regional level, in terms of error, the lowest errors are mostly obtained with the model including historical and EHR data. In terms of correlation, the highest values are mostly obtained with the model combining historical data and both data sources, Google and EHR. For the nonlinear approach, the values for correlation are higher and the values for errors are lower than the values obtained with the linear approach.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>PCC<sup>a</sup> and RMSE<sup>b</sup> values obtained for the entire prediction period (May 2014 to March 2018) for all levels and models.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="260"/>
              <col width="100"/>
              <col width="90"/>
              <col width="70"/>
              <col width="100"/>
              <col width="0"/>
              <col width="70"/>
              <col width="100"/>
              <col width="0"/>
              <col width="70"/>
              <col width="110"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Levels and data sources</td>
                  <td colspan="2">Real time</td>
                  <td colspan="3">1-week forecast</td>
                  <td colspan="3">2-week forecast</td>
                  <td colspan="2">3-week forecast</td>
                </tr>
                <tr valign="top">
                  <td colspan="2">
                    <break/>
                  </td>
                  <td>PCC</td>
                  <td>RMSE</td>
                  <td>PCC</td>
                  <td>RMSE</td>
                  <td colspan="2">PCC</td>
                  <td>RMSE</td>
                  <td colspan="2">PCC</td>
                  <td>RMSE</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="12">
                    <bold>National</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52)<sup>c</sup></td>
                  <td>
                    <italic>0.942</italic>
                    <sup>d</sup>
                  </td>
                  <td>
                    <italic>15.47</italic>
                  </td>
                  <td>
                    <italic>0.913</italic>
                  </td>
                  <td>
                    <italic>19.71</italic>
                  </td>
                  <td colspan="2">
                    <italic>0.892</italic>
                  </td>
                  <td>
                    <italic>22.19</italic>
                  </td>
                  <td colspan="2">
                    <italic>0.903</italic>
                  </td>
                  <td>
                    <italic>22.30</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google</td>
                  <td>0.884</td>
                  <td>45.59</td>
                  <td>0.876</td>
                  <td>45.72</td>
                  <td colspan="2">0.858</td>
                  <td>42.63</td>
                  <td colspan="2">0.830</td>
                  <td>40.52</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>EHR<sup>e</sup></td>
                  <td>0.795</td>
                  <td>32.93</td>
                  <td>0.615</td>
                  <td>50.68</td>
                  <td colspan="2">0.739</td>
                  <td>37.84</td>
                  <td colspan="2">0.692</td>
                  <td>41.30</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and Google</td>
                  <td>
                    <italic>0.946</italic>
                  </td>
                  <td>
                    <italic>15.87</italic>
                  </td>
                  <td>
                    <italic>0.913</italic>
                  </td>
                  <td>21.68</td>
                  <td colspan="2">
                    <italic>0.892</italic>
                  </td>
                  <td>23.63</td>
                  <td colspan="2">
                    <italic>0.909</italic>
                  </td>
                  <td>
                    <italic>22.98</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and EHR</td>
                  <td>0.938</td>
                  <td>
                    <italic>15.93</italic>
                  </td>
                  <td>0.906</td>
                  <td>
                    <italic>20.21</italic>
                  </td>
                  <td colspan="2">0.887</td>
                  <td>
                    <italic>22.85</italic>
                  </td>
                  <td colspan="2">0.890</td>
                  <td>23.31</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google and EHR</td>
                  <td>0.833</td>
                  <td>43.26</td>
                  <td>0.780</td>
                  <td>49.50</td>
                  <td colspan="2">0.849</td>
                  <td>37.70</td>
                  <td colspan="2">0.790</td>
                  <td>41.88</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52), Google, and EHR</td>
                  <td>
                    <italic>0.946</italic>
                  </td>
                  <td>
                    <italic>15.72</italic>
                  </td>
                  <td>0.909</td>
                  <td>21.76</td>
                  <td colspan="2">
                    <italic>0.895</italic>
                  </td>
                  <td>23.87</td>
                  <td colspan="2">0.886</td>
                  <td>24.11</td>
                </tr>
                <tr valign="top">
                  <td colspan="12">
                    <bold>Regional</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52)</td>
                  <td>0.745</td>
                  <td>
                    <italic>38.47</italic>
                  </td>
                  <td>0.699</td>
                  <td>42.68</td>
                  <td colspan="2">0.685</td>
                  <td>
                    <italic>44.11</italic>
                  </td>
                  <td colspan="2">0.677</td>
                  <td>45.05</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google</td>
                  <td>0.708</td>
                  <td>62.90</td>
                  <td>0.658</td>
                  <td>61.58</td>
                  <td colspan="2">0.671</td>
                  <td>57.02</td>
                  <td colspan="2">0.689</td>
                  <td>54.55</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>EHR</td>
                  <td>0.651</td>
                  <td>47.76</td>
                  <td>0.531</td>
                  <td>66.99</td>
                  <td colspan="2">0.562</td>
                  <td>60.51</td>
                  <td colspan="2">0.526</td>
                  <td>63.26</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and Google</td>
                  <td>0.757</td>
                  <td>39.71</td>
                  <td>0.700</td>
                  <td>46.91</td>
                  <td colspan="2">0.694</td>
                  <td>47.38</td>
                  <td colspan="2">
                    <italic>0.703</italic>
                  </td>
                  <td>47.87</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52) and EHR</td>
                  <td>0.743</td>
                  <td>
                    <italic>38.37</italic>
                  </td>
                  <td>
                    <italic>0.720</italic>
                  </td>
                  <td>
                    <italic>41.05</italic>
                  </td>
                  <td colspan="2">0.694</td>
                  <td>
                    <italic>43.83</italic>
                  </td>
                  <td colspan="2">0.694</td>
                  <td>
                    <italic>44.09</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Google and EHR</td>
                  <td>0.542</td>
                  <td>76.87</td>
                  <td>0.584</td>
                  <td>69.17</td>
                  <td colspan="2">0.663</td>
                  <td>55.48</td>
                  <td colspan="2">0.658</td>
                  <td>56.25</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>AR(52), Google, and EHR</td>
                  <td>
                    <italic>0.759</italic>
                  </td>
                  <td>
                    <italic>38.88</italic>
                  </td>
                  <td>
                    <italic>0.718</italic>
                  </td>
                  <td>44.63</td>
                  <td colspan="2">
                    <italic>0.702</italic>
                  </td>
                  <td>46.25</td>
                  <td colspan="2">
                    <italic>0.701</italic>
                  </td>
                  <td>47.17</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>PCC: Pearson correlation coefficient.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>RMSE: root mean squared error.</p>
              </fn>
              <fn id="table2fn3">
                <p><sup>c</sup>AR(52): autoregressive model of order 52.</p>
              </fn>
              <fn id="table2fn4">
                <p><sup>d</sup>Italicization highlights the 2 highest correlations and lowest errors obtained with the models for real time and 1-week, 2-week, and 3-week forecasts.</p>
              </fn>
              <fn id="table2fn5">
                <p><sup>e</sup>EHR: electronic health record.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>National Analysis</title>
          <p>For real-time estimates, the error values range from 45.6 to 15.5 and the correlation values range from 0.80 to 0.95, with the lowest error and the highest correlation obtained with the model using only historical data—AR(52)—or the models combining historical data and external data sources. The results are similar for long-term forecasts, with error values ranging from 50.7 to 19.7 and correlation values ranging from 0.62 to 0.91 for 1-week estimates. For 2-week and 3-week estimates, the error values range from 42.6 to 22.8 and 41.9 to 22.3, respectively. In terms of 2-week and 3-week correlation, the values range from 0.74 to 0.90 and from 0.69 to 0.91, respectively.</p>
          <p><xref rid="figure5" ref-type="fig">Figure 5</xref> illustrates the estimates obtained at the national level for forecasts up to 3 weeks with the model using only historical data and the model using historical data and both data sources, Google and EHR. For real-time estimates and long-term forecasts, the results obtained with the 2 models are comparable. In comparison with the linear approach, the nonlinear approach tends to smooth estimates.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>National level. Predictions up to 3 weeks obtained at the national level with the model using only historical data and the model using historical data and both data sources, Google and EHR. Gold standard, French Sentinel network data. EHR: electronic health record.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Regional Analysis</title>
          <p>For real-time estimates, the error values range from 76.9 to 38.4 and the correlation values range from 0.54 to 0.76, with the lowest error and the highest correlation values obtained with AR(52) model and the models combining historical data and external data sources. For 1-week, 2-week, and 3-week estimates, the error values range from 69.2 to 41.1, from 60.5 to 43.8, and from 63.3 to 44.1, respectively. The lowest errors values for long-term forecasts are all obtained with the model using historical and EHR data. In terms of 1-week, 2-week, and 3-week correlation, the values range from 0.53 to 0.72, from 0.56 to 0.70, and from 0.53 to 0.70, respectively. The highest correlations for long-term forecasts are all obtained with the model using historical data and both data sources, Google and EHR.</p>
          <p><xref rid="figure6" ref-type="fig">Figure 6</xref> illustrates the estimates obtained at the regional level for forecasts up to 3 weeks with the model using only historical data and the model using historical data and both data sources, Google and EHR. At the national level, results are comparable between the 2 models, and the nonlinear approach tends to smooth the estimates.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Regional level. Predictions up to 3 weeks obtained at the regional level with the model using only historical data and the model using historical data and both data sources, Google and EHR. Gold standard, French Sentinel network data. EHR: electronic health record.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Comparison of AG and Influenza</title>
        <p>To assess the role of external data sources in AG forecasting in comparison with influenza forecasting, we studied both time series, at the national and regional levels. As both series were stationary, we compared the seasonality. <xref rid="figure7" ref-type="fig">Figure 7</xref> corresponds to ACF and PACF obtained for AG and influenza.</p>
        <p>The ACF plot provides the correlation coefficients between a time series and its lagged values. The PACF plot provides the correlation coefficients between a time series and its lagged values after removing the effects that are already explained by the previous lags.</p>
        <p>The ACF plots at the national and regional levels (<xref rid="figure7" ref-type="fig">Figures 7</xref>A and 7C) show that both time series, AG and influenza, are seasonal, but with autocorrelation more important for AG than for influenza. This result can explain why historical data are able to provide more information for AG than for influenza. We have similar results for PACF plots (<xref rid="figure7" ref-type="fig">Figures 7</xref>B and 7D), at the national and regional levels, where the coefficients of partial autocorrelation are larger for AG than for influenza.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>ACF and PACF. Autocorrelation obtained for flu and AG at the national level (Figures A and B) and regional level (Figures C and D). ACF: autocorrelation function; AG: acute gastroenteritis; PACF: partial autocorrelation function.</p>
          </caption>
          <graphic xlink:href="publichealth_v9i1e34982_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Analysis of Forecast up to 10 Weeks</title>
        <sec>
          <title>Linear Approach</title>
          <p><xref rid="figure8" ref-type="fig">Figure 8</xref> and Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> show, for the linear approach, errors and correlation for AG at the national and regional levels, for forecasts up to 10 weeks. At the national level, the lowest error for real-time estimates is obtained with the linear approach using only historical data—AR(52). For long-term forecasts, from up to 1 week to up to 10 weeks, the lowest errors are obtained by using historical data and both data sources, Google and EHR. In terms of correlation, in all cases, the highest values are obtained by using only historical data. At the regional level, in terms of errors, both data sources, Google and EHR, allow to improve accuracy for forecasts from up to 4 weeks to up to 10 weeks. In terms of correlation, results are similar to those at the national level, with high values obtained by using only historical data.</p>
          <p><xref rid="figure8" ref-type="fig">Figure 8</xref> and Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> show, for the linear approach, errors and correlation for influenza at the national and regional levels, for forecasts up to 10 weeks. In contrast to AG at the national and regional levels, in terms of errors and correlation, the most accurate results are obtained by using historical data, Google data, and EHR data.</p>
          <fig id="figure8" position="float">
            <label>Figure 8</label>
            <caption>
              <p>(A) Error values obtained at the national level for the flu and gastroenteritis for forecasts up to 10 weeks with the Elastic Net model. The solid line corresponds to the results obtained with the Elastic Net model using only historical data. The dotted line corresponds to the results obtained with the Elastic Net model using historical data and both Google and EHR data. The red color is the results for gastroenteritis disease, whereas the blue color is the results for the flu. This style line and color code are used for the 4 panels of this figure. (B) Correlation values obtained at the national level. (C) Error values obtained at the regional level. (D) Correlation values obtained at the regional level. EHR: electronic health record; RMSE: root mean squared error.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Nonlinear Approach</title>
          <p><xref rid="figure9" ref-type="fig">Figure 9</xref> and Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> show, for the nonlinear approach, errors and correlation for AG at the national and regional levels, for forecasts up to 10 weeks. At the national level, in terms of errors, the lowest values are obtained by using only historical data—AR(52). In terms of correlation, for long-term forecasts, the highest values are obtained by using only historical data. At the regional level, in terms of errors, for forecast up to 4 weeks, the lowest values are obtained by using only historical data. However, for long-term forecasts, the most accurate results are obtained by using historical data and both data sources, Google and EHR.</p>
          <p><xref rid="figure9" ref-type="fig">Figure 9</xref> and Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> show, for the nonlinear approach, errors and correlation for influenza at the national and regional levels, for forecasts up to 10 weeks. At the national level, in terms of errors and correlation, the most accurate values for forecasts up to 2 weeks are obtained by using historical data and both Google and EHR data. For forecasts from up to 3 weeks to up to 5 weeks, most accurate estimates are obtained by using only historical data. For long-term forecasts, results are similar for both models, the one using only historical data and the one using historical data and Google and EHR data. At the regional level, for forecasts up to 4 weeks, in terms of errors, the lowest values are obtained, in most cases, by using only historical data. For long-term forecasts, the most accurate estimates are obtained with the model using historical data and both Google and EHR data. In terms of correlation, in most cases, the highest values are obtained by using historical data and both Google and EHR data.</p>
          <fig id="figure9" position="float">
            <label>Figure 9</label>
            <caption>
              <p>(A) Error values obtained at the national level for the flu and gastroenteritis for forecasts up to 10 weeks with the RF model. The solid line corresponds to the results obtained with the random forest (RF) model using only historical data. The dotted line corresponds to the results obtained with the RF model using historical data and both Google and EHR data. The red color is the results for gastroenteritis disease, whereas the blue color is the results for the flu. This style line and color code are used for the 4 panels of this figure. (B) Correlation values obtained at the national level. (C) Error values obtained at the regional level. (D) Correlation values obtained at the regional level. EHR: electronic health record.</p>
            </caption>
            <graphic xlink:href="publichealth_v9i1e34982_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We adjusted a methodology developed for influenza, to accurately track AG activity. Our method is able to provide forecasts up to 10 weeks for national and regional levels and for emergency and hospitalization stays (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To the best of our knowledge, this is a disease and a spatial resolution (French regions and hospitals) for which no forecasting approaches have been explored previously.</p>
        <p>In this study, we show that external data sources, EHR and Google, contribute to improving AG surveillance, in particular for long-term forecasts, with more important contribution from historical data. Specifically, when we use the linear approach (Elastic Net), in terms of errors at the national level, the lowest values are obtained by using historical data and both Google and EHR data. These results are consistent for forecasts from up to 1 week to up to 10 weeks (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). At the regional level, the model using only historical data is the model producing the lowest errors for short-term forecasts (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). However, for long-term forecasts, the inclusion of external data sources (Google and EHR) improves the estimates. We conducted a Diebold Mariano test [<xref ref-type="bibr" rid="ref39">39</xref>] to assess if the forecasts are statistically different when using only historical data or the combination of historical data, Google data, and EHR data (Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We can see that at the national level, the estimates are statistically more accurate when using historical data and both Google and EHR data for 3-week and long-term forecasts. At the regional level, the use of external data sources produces estimates that are statistically more accurate for 7-week and long-term forecasts.</p>
        <p>As we used a method developed for influenza outbreaks, we compared the results obtained for AG with those obtained for influenza. At the national and regional levels, with the linear approach, for both short-term and long-term forecasts, the most accurate estimates are obtained with the model using historical data and external data sources (Google and EHR data). An understanding of these results can emerge from the time series analysis (<xref rid="figure7" ref-type="fig">Figure 7</xref>). We show that the seasonality is more important for AG epidemics than for influenza, resulting in historical data capable of providing more information for AG than for influenza. Nonetheless, for long-term forecasts, historical data are not sufficient and external data sources can be used to supplement them. Thus, it is important to integrate external data to improve long-term estimates.</p>
        <p>In addition to the linear approach, we conducted the same analysis with a nonlinear approach (RF). At the national level, the results differ slightly from those obtained using the linear approach. In terms of error and correlation, the model using only historical data provides more accurate estimates than the model using historical data, Google data, and EHR data. These results are consistent for real-time estimates and long-term forecasts (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). At the regional level, regarding the linear approach, in terms of error for short-term forecasts, the model using only historical data allows to produce the most accurate estimates. For long-term forecasts, the model including external data sources, Google and EHR, decreases the error. In terms of correlation, for both short-term and long-term forecasts, the model producing the highest values is the model using historical data, Google data, and EHR data. In all cases, the nonlinear approach allows us to obtain high values in terms of correlation and low values in terms of error when compared with those obtained using the linear approach. However, as seen in <xref rid="figure5" ref-type="fig">Figures 5</xref> and <xref rid="figure6" ref-type="fig">6</xref>, the nonlinear approach tends to smooth the estimates compared with those obtained using the linear approach. This can result in decrease in error and increase in correlation.</p>
        <p>The fact that we could only access EHR data from Rennes University Hospital, and thus from the Brittany region, prevented us from being able to quantify the added value of nation-wide EHR information. This should be evaluated in future studies by integrating EHR data from different hospitals from all the French regions. However, it is interesting that data from a hospital in Rennes can improve AG forecasting at the national level, even if, as we described previously, EHR data seem more important for the regional level.</p>
        <p>Data retrieved from Google Correlate are normalized by Google in a (frequently) distinct sample and over different time periods depending on the data request. This prenormalization can affect our results, but as shown in the study by Arena et al [<xref ref-type="bibr" rid="ref15">15</xref>], the process of dynamic training minimizes the impact of this instability.</p>
        <p>It would be interesting to test other approaches that gave good results for influenza, for example, an ensemble method that combines the power of the linear and the nonlinear approaches [<xref ref-type="bibr" rid="ref14">14</xref>] or other machine learning methods such as Support Vector Machine or neural networks. We tested a long short-term memory model to forecast gastroenteritis up to 10 weeks. We obtained root mean squared error=2.96 for real-time forecasting. We believe that these results are really promising and could be further studied in the future by developing a neural network combining long short-term memory for historical data and another neural network for external data sources such as Google data or EHR data. In addition, other methods could be tested to obtain more information from external data sources as transformations of the input variables. Variable transformations could be tested on external data sources to check whether we could get more information. Finally, it could be meaningful to first remove the multicollinearity of our predictive variables with traditional methods such as the Variance Inflation Factor and then select the most important variables with a stepwise regression to run a linear regression on the remaining variables.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We show that hospital data and internet search data significantly contribute to predict AG outbreaks, in particular for long-term forecasts. The use of these external data sources in combination with historical data could supplement traditional surveillance systems. The methods we developed could help to reduce the impact of the AG peak, particularly in hospitals, by making it possible to anticipate increased activity by up to 10 weeks.</p>
        <p>We acknowledge that there is still scope for improvement. Future studies could explore the incorporation of more information from external data sources as a way to yield more robust results.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Heat maps obtained at both, national and regional levels, for the linear approach at 1-week, 2-week and 3-week forecasts. We also added the correlation and errors obtained up to 10-week forecast, for the linear and nonlinear approaches for both, influenza and gastroenteritis diseases.</p>
        <media xlink:href="publichealth_v9i1e34982_app1.docx" xlink:title="DOCX File , 3952 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACF</term>
          <def>
            <p>autocorrelation function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AG</term>
          <def>
            <p>acute gastroenteritis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AR(52)</term>
          <def>
            <p>autoregressive model of order 52</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CDW</term>
          <def>
            <p>clinical data warehouse</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">eHOP</term>
          <def>
            <p>entrepôt de données de l’HÔPital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PACF</term>
          <def>
            <p>partial autocorrelation function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank the French Agence Nationale de Recherche for funding this study through the Integrating and Sharing Health Data for Research project (grant ANR-15-CE19-0024). The authors also thank the French Sentinel network and Google search engine for making their data publicly available. MS and CP were partially funded by the National Institute of General Medical Sciences of the National Institutes of Health, under award number R01GM130668. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>CP, AL, and GB conceived the study, and CP and GB obtained the data sets. CP and MS proposed the forecasting methodology. CP conducted the statistical experiments. CP and MS analyzed and interpreted the results. CP wrote the manuscript with support from MS, AL, and GB. All authors reviewed and approved the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farthing</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Diarrhoea: a significant worldwide problem</article-title>
          <source>Int JAntimicrobial Agent</source>
          <year>2000</year>
          <month>2</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>65</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/s0924-8579(99)00149-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majowicz</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Scallan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Adak</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Gauci</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Henao</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Sockett</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>A common, symptom-based case definition for gastroenteritis</article-title>
          <source>Epidemiol Infect</source>
          <year>2008</year>
          <month>07</month>
          <volume>136</volume>
          <issue>7</issue>
          <fpage>886</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1017/S0950268807009375</pub-id>
          <pub-id pub-id-type="medline">17686196</pub-id>
          <pub-id pub-id-type="pii">S0950268807009375</pub-id>
          <pub-id pub-id-type="pmcid">PMC2870876</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kosek</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guerrant</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>The global burden of diarrhoeal disease, as estimated from studies published between 1992 and 2000</article-title>
          <source>Bull World Health Organ</source>
          <year>2003</year>
          <volume>81</volume>
          <issue>3</issue>
          <fpage>197</fpage>
          <lpage>204</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/12764516"/>
          </comment>
          <pub-id pub-id-type="medline">12764516</pub-id>
          <pub-id pub-id-type="pii">S0042-96862003000300010</pub-id>
          <pub-id pub-id-type="pmcid">PMC2572419</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rivière</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baroux</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ambert-Balay</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Beaudeau</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jourdan-Da Silva</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Van Cauteren</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bounoure</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cahuzac</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Blanchon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Prazuck</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Turbelin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hanslik</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Secular trends in incidence of acute gastroenteritis in general practice, France, 1991 to 2015</article-title>
          <source>Euro Surveill</source>
          <year>2017</year>
          <month>12</month>
          <volume>22</volume>
          <issue>50</issue>
          <fpage>17-00121</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.eurosurveillance.org/content/10.2807/1560-7917.ES.2017.22.50.17-00121"/>
          </comment>
          <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2017.22.50.17-00121</pub-id>
          <pub-id pub-id-type="medline">29258648</pub-id>
          <pub-id pub-id-type="pmcid">PMC5743098</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>VAN CAUTEREN</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>De VALK</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>VAUX</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Le STRAT</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>VAILLANT</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Burden of acute gastroenteritis and healthcare-seeking behaviour in France: a population-based study</article-title>
          <source>Epidemiol Infect</source>
          <year>2011</year>
          <month>06</month>
          <day>07</day>
          <volume>140</volume>
          <issue>4</issue>
          <fpage>697</fpage>
          <lpage>705</lpage>
          <pub-id pub-id-type="doi">10.1017/s0950268811000999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rohayem</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Norovirus seasonality and the potential impact of climate change</article-title>
          <source>Clin Microbiol Infect</source>
          <year>2009</year>
          <month>06</month>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>524</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1198-743X(14)60440-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1469-0691.2009.02846.x</pub-id>
          <pub-id pub-id-type="medline">19604277</pub-id>
          <pub-id pub-id-type="pii">S1198-743X(14)60440-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greer</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Drews</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fisman</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Why "winter" vomiting disease? Seasonality, hydrology, and Norovirus epidemiology in Toronto, Canada</article-title>
          <source>Ecohealth</source>
          <year>2009</year>
          <month>06</month>
          <day>12</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>192</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1007/s10393-009-0247-8</pub-id>
          <pub-id pub-id-type="medline">20151172</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carneiro</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Mylonakis</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Google trends: a web-based tool for real-time surveillance of disease outbreaks</article-title>
          <source>Clin Infect Dis</source>
          <year>2009</year>
          <month>11</month>
          <day>15</day>
          <volume>49</volume>
          <issue>10</issue>
          <fpage>1557</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1086/630200</pub-id>
          <pub-id pub-id-type="medline">19845471</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Situation observed in metropolitan France for week 51 of the year 2022, from 19/12/2022 to 25/12/2022</article-title>
          <source>Sentiweb - the site of the Sentinels Network</source>
          <access-date>2022-07-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://websenti.u707.jussieu.fr/sentiweb/">https://websenti.u707.jussieu.fr/sentiweb/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Wikswo</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Barclay</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kambhampati</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shioda</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Parashar</surname>
              <given-names>UD</given-names>
            </name>
            <name name-style="western">
              <surname>Vinjé</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Near real-time surveillance of U.S. Norovirus outbreaks by the norovirus sentinel testing and tracking network - United States, August 2009-July 2015</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2017</year>
          <month>02</month>
          <day>24</day>
          <volume>66</volume>
          <issue>7</issue>
          <fpage>185</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6607a1"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6607a1</pub-id>
          <pub-id pub-id-type="medline">28231235</pub-id>
          <pub-id pub-id-type="pmcid">PMC5657847</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kou</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Accurate estimation of influenza epidemics using Google search data via ARGO</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2015</year>
          <month>11</month>
          <day>24</day>
          <volume>112</volume>
          <issue>47</issue>
          <fpage>14473</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26553980"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1515373112</pub-id>
          <pub-id pub-id-type="medline">26553980</pub-id>
          <pub-id pub-id-type="pii">1515373112</pub-id>
          <pub-id pub-id-type="pmcid">PMC4664296</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Louie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zink</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Cloud-based electronic health records for real-time, region-specific influenza surveillance</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>11</day>
          <volume>6</volume>
          <fpage>25732</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep25732"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep25732</pub-id>
          <pub-id pub-id-type="medline">27165494</pub-id>
          <pub-id pub-id-type="pii">srep25732</pub-id>
          <pub-id pub-id-type="pmcid">PMC4863169</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kou</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Using electronic health records and internet search information for accurate influenza forecasting</article-title>
          <source>BMC Infect Dis</source>
          <year>2017</year>
          <month>05</month>
          <day>08</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>332</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcinfectdis.biomedcentral.com/articles/10.1186/s12879-017-2424-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12879-017-2424-7</pub-id>
          <pub-id pub-id-type="medline">28482810</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12879-017-2424-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5423019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Hattab</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Clemente</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Biggerstaff</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Improved state-level influenza nowcasting in the United States leveraging Internet-based data and network approaches</article-title>
          <source>Nat Commun</source>
          <year>2019</year>
          <month>01</month>
          <day>11</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>147</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-018-08082-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-018-08082-0</pub-id>
          <pub-id pub-id-type="medline">30635558</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-018-08082-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6329822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arena</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Amoros</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Vaillant</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ambert-Balay</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chikhi-Brachet</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jourdan-Da Silva</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Varesi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Arrighi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Souty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Blanchon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Falchi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hanslik</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Acute diarrhea in adults consulting a general practitioner in France during winter: incidence, clinical characteristics, management and risk factors</article-title>
          <source>BMC Infect Dis</source>
          <year>2014</year>
          <month>10</month>
          <day>30</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>574</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcinfectdis.biomedcentral.com/articles/10.1186/s12879-014-0574-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12879-014-0574-4</pub-id>
          <pub-id pub-id-type="medline">25358721</pub-id>
          <pub-id pub-id-type="pii">s12879-014-0574-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4220050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charles</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Holman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Curns</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Parashar</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Glass</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bresee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Hospitalizations associated with rotavirus gastroenteritis in the United States, 1993-2002</article-title>
          <source>Pediatr Infect Dis J</source>
          <year>2006</year>
          <volume>25</volume>
          <issue>6</issue>
          <fpage>489</fpage>
          <lpage>93</lpage>
          <pub-id pub-id-type="doi">10.1097/01.inf.0000215234.91997.21</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wikswo</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Manikonda</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Yoder</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Gould</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Acute gastroenteritis surveillance through the National Outbreak Reporting System, United States</article-title>
          <source>Emerg Infect Dis</source>
          <year>2013</year>
          <month>08</month>
          <volume>19</volume>
          <issue>8</issue>
          <fpage>1305</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23876187"/>
          </comment>
          <pub-id pub-id-type="doi">10.3201/eid1908.130482</pub-id>
          <pub-id pub-id-type="medline">23876187</pub-id>
          <pub-id pub-id-type="pmcid">PMC3739540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amador</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vicari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Turcios-Ruiz</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Melendez</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Malek</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Aldighieri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kerin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bresee</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Glass</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Andrus</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Outbreak of rotavirus gastroenteritis with high mortality, Nicaragua, 2005</article-title>
          <source>Rev Panam Salud Publica</source>
          <year>2008</year>
          <month>04</month>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>277</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1590/s1020-49892008000400008</pub-id>
          <pub-id pub-id-type="medline">18505609</pub-id>
          <pub-id pub-id-type="pii">S1020-49892008000400008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kirian</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Weintraub</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Prediction of gastrointestinal disease with over-the-counter diarrheal remedy sales records in the San Francisco Bay Area</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2010</year>
          <month>07</month>
          <day>20</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>39</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-10-39"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-10-39</pub-id>
          <pub-id pub-id-type="medline">20646311</pub-id>
          <pub-id pub-id-type="pii">1472-6947-10-39</pub-id>
          <pub-id pub-id-type="pmcid">PMC2920250</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lopman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tate</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Esparza-Aguilar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez-Uribe</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Use of internet search data to monitor rotavirus vaccine impact in the United States, United Kingdom, and Mexico</article-title>
          <source>J Pediatric Infect Dis Soc</source>
          <year>2016</year>
          <volume>3</volume>
          <issue>suppl_1</issue>
          <fpage>771</fpage>
          <pub-id pub-id-type="doi">10.1093/ofid/ofw172.634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pelat</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Turbelin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bar-Hen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Flahault</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Valleron</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>More diseases tracked by using Google Trends</article-title>
          <source>Emerg Infect Dis</source>
          <year>2009</year>
          <month>08</month>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>1327</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19751610"/>
          </comment>
          <pub-id pub-id-type="doi">10.3201/eid1508.090299</pub-id>
          <pub-id pub-id-type="medline">19751610</pub-id>
          <pub-id pub-id-type="pmcid">PMC2815981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adadi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Adadi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berrada</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Gastroenterology meets machine learning: status quo and quo vadis</article-title>
          <source>Adv Bioinformatics</source>
          <year>2019</year>
          <month>04</month>
          <day>02</day>
          <volume>2019</volume>
          <fpage>1870975</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2019/1870975"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2019/1870975</pub-id>
          <pub-id pub-id-type="medline">31065266</pub-id>
          <pub-id pub-id-type="pmcid">PMC6466966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ginsberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mohebbi</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Brammer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smolinski</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Brilliant</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Detecting influenza epidemics using search engine query data</article-title>
          <source>Nature</source>
          <year>2009</year>
          <month>02</month>
          <day>19</day>
          <volume>457</volume>
          <issue>7232</issue>
          <fpage>1012</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1038/nature07634</pub-id>
          <pub-id pub-id-type="medline">19020500</pub-id>
          <pub-id pub-id-type="pii">nature07634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>When Google got flu wrong</article-title>
          <source>Nature</source>
          <year>2013</year>
          <month>02</month>
          <day>14</day>
          <volume>494</volume>
          <issue>7436</issue>
          <fpage>155</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1038/494155a</pub-id>
          <pub-id pub-id-type="medline">23407515</pub-id>
          <pub-id pub-id-type="pii">494155a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bouzillé</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Poirier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Campillo-Gimenez</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aubert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chabot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chazard</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lavenu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cuggia</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Leveraging hospital big data to monitor flu epidemics</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2018</year>
          <month>02</month>
          <volume>154</volume>
          <fpage>153</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.archives-ouvertes.fr/hal-01671695"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2017.11.012</pub-id>
          <pub-id pub-id-type="medline">29249339</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(17)30155-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poirier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lavenu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bertaud</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Campillo-Gimenez</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chazard</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cuggia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bouzillé</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Real time influenza monitoring using hospital big data in combination with machine learning methods: comparison study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2018</year>
          <month>12</month>
          <day>21</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e11361</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2018/4/e11361/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11361</pub-id>
          <pub-id pub-id-type="medline">30578212</pub-id>
          <pub-id pub-id-type="pii">v4i4e11361</pub-id>
          <pub-id pub-id-type="pmcid">PMC6320394</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Situation observed in metropolitan France for week 01 of the year 2023, from 02/01/2023 to 08/01/2023</article-title>
          <source>Sentinelles</source>
          <year>2022</year>
          <month>11</month>
          <day>1</day>
          <access-date>2023-01-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://websenti.u707.jussieu.fr/sentiweb">http://websenti.u707.jussieu.fr/sentiweb</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <source>https://trends.google.fr/trends/?geo=FR</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trends.google.fr/trends/?geo=FR">https://trends.google.fr/trends/?geo=FR</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohebbi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderkam</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kodysh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schonberger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Google correlate whitepaper</article-title>
          <source>Google</source>
          <year>2011</year>
          <access-date>2018-03-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://research.google/pubs/pub41695/">https://research.google/pubs/pub41695/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Regularization and variable selection via the elastic net</article-title>
          <source>J Royal Statistical Soc B</source>
          <year>2005</year>
          <month>04</month>
          <volume>67</volume>
          <issue>2</issue>
          <fpage>301</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regression shrinkage and selection via the lasso</article-title>
          <source>J Royal Statistical Soc B (Methodological)</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>267</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hoerl</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Kennard</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>Ridge regression: biased estimation for nonorthogonal problems</article-title>
          <source>Technometrics</source>
          <year>1970</year>
          <month>02</month>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>55</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.2307/1271436</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>caret (Classification And Regression Training) R package that contains misc functions for training and plotting classification and regression models</article-title>
          <source>GitHub</source>
          <access-date>2020-05-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/topepo/caret/">https://github.com/topepo/caret/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>R Core Team</collab>
          </person-group>
          <source>R: A Language and Environment for Statistical Computing</source>
          <year>2015</year>
          <publisher-loc>Vienna, Austria</publisher-loc>
          <publisher-name>R Foundation for Statistical Computing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine Learning</source>
          <year>2001</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rabinowitz</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Comparison of ARIMA and Random Forest time series models for prediction of avian influenza H5N1 outbreaks</article-title>
          <source>BMC Bioinformatics</source>
          <year>2014</year>
          <month>08</month>
          <day>13</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>276</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-276"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-15-276</pub-id>
          <pub-id pub-id-type="medline">25123979</pub-id>
          <pub-id pub-id-type="pii">1471-2105-15-276</pub-id>
          <pub-id pub-id-type="pmcid">PMC4152592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dudek</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Short-term load forecasting using random forests</article-title>
          <source>Intelligent Systems'2014</source>
          <year>2015</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Classification and regression by randomForest</article-title>
          <source>R News</source>
          <year>2002</year>
          <access-date>2021-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cogns.northwestern.edu/cbmg/LiawAndWiener2002.pdf">https://cogns.northwestern.edu/cbmg/LiawAndWiener 2002.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diebold</surname>
              <given-names>FX</given-names>
            </name>
            <name name-style="western">
              <surname>Mariano</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>Comparing predictive accuracy</article-title>
          <source>J Business Econ Stat</source>
          <year>1995</year>
          <month>07</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>253</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1080/07350015.1995.10524599</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
