<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id><journal-id journal-id-type="publisher-id">publichealth</journal-id><journal-id journal-id-type="index">9</journal-id><journal-title>JMIR Public Health and Surveillance</journal-title><abbrev-journal-title>JMIR Public Health Surveill</abbrev-journal-title><issn pub-type="epub">2369-2960</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e53719</article-id><article-id pub-id-type="doi">10.2196/53719</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Handling Missing Data in COVID-19 Incidence Estimation: Secondary Data Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Pham</surname><given-names>Hai-Thanh</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Do</surname><given-names>Toan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Baek</surname><given-names>Jonggyu</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>Cong-Khanh</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pham</surname><given-names>Quang-Thai</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>Hoa L</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Goldberg</surname><given-names>Robert</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pham</surname><given-names>Quang Loc</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Giang</surname><given-names>Le Minh</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>School of Preventive Medicine and Public Health, Hanoi Medical University</institution>, <addr-line>1 Ton That Tung Street, Kim Lien Ward, Dong Da District</addr-line><addr-line>Hanoi</addr-line>, <country>Vietnam</country></aff><aff id="aff2"><institution>UMass Chan Medical School, University of Massachusetts Medical School</institution>, <addr-line>Worcester</addr-line><addr-line>MA</addr-line>, <country>United States</country></aff><aff id="aff3"><institution>National Institute of Hygiene and Epidemiology</institution>, <addr-line>Hanoi</addr-line>, <country>Vietnam</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Kim</surname><given-names>Ju-Hyung</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Nooghabi</surname><given-names>Mehdi Jabbari</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mohanty</surname><given-names>Sachi Nandan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Hai-Thanh Pham, MSc, School of Preventive Medicine and Public Health, Hanoi Medical University, 1 Ton That Tung Street, Kim Lien Ward, Dong Da District, Hanoi, 100000, Vietnam, 84 368-577-4236; <email>thanh.ph.hmu@gmail.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>20</day><month>8</month><year>2024</year></pub-date><volume>10</volume><elocation-id>e53719</elocation-id><history><date date-type="received"><day>17</day><month>10</month><year>2023</year></date><date date-type="rev-recd"><day>05</day><month>06</month><year>2024</year></date><date date-type="accepted"><day>12</day><month>06</month><year>2024</year></date></history><copyright-statement>&#x00A9; Hai-Thanh Pham, Toan Do, Jonggyu Baek, Cong-Khanh Nguyen, Quang-Thai Pham, Hoa L Nguyen, Robert Goldberg, Quang Loc Pham, Le Minh Giang. Originally published in JMIR Public Health and Surveillance (<ext-link ext-link-type="uri" xlink:href="https://publichealth.jmir.org">https://publichealth.jmir.org</ext-link>), 20.8.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://publichealth.jmir.org">https://publichealth.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://publichealth.jmir.org/2024/1/e53719"/><abstract><sec><title>Background</title><p>The COVID-19 pandemic has revealed significant challenges in disease forecasting and in developing a public health response, emphasizing the need to manage missing data from various sources in making accurate forecasts.</p></sec><sec><title>Objective</title><p>We aimed to show how handling missing data can affect estimates of the COVID-19 incidence rate (CIR) in different pandemic situations.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study used data from the COVID-19/SARS-CoV-2 surveillance system at the National Institute of Hygiene and Epidemiology, Vietnam. We separated the available data set into 3 distinct periods: zero COVID-19, transition, and new normal. We randomly removed 5% to 30% of data that were missing completely at random, with a break of 5% at each time point in the variable daily caseload of COVID-19. We selected 7 analytical methods to assess the effects of handling missing data and calculated statistical and epidemiological indices to measure the effectiveness of each method.</p></sec><sec sec-type="results"><title>Results</title><p>Our study examined missing data imputation performance across 3 study time periods: zero COVID-19 (n=3149), transition (n=1290), and new normal (n=9288). Imputation analyses showed that K-nearest neighbor (KNN) had the lowest mean absolute percentage change (APC) in CIR across the range (5% to 30%) of missing data. For instance, with 15% missing data, KNN resulted in 10.6%, 10.6%, and 9.7% average bias across the zero COVID-19, transition, and new normal periods, compared to 39.9%, 51.9%, and 289.7% with the maximum likelihood method. The autoregressive integrated moving average model showed the greatest mean APC in the mean number of confirmed cases of COVID-19 during each COVID-19 containment cycle (CCC) when we imputed the missing data in the zero COVID-19 period, rising from 226.3% at the 5% missing level to 6955.7% at the 30% missing level. Imputing missing data with median imputation methods had the lowest bias in the average number of confirmed cases in each CCC at all levels of missing data. In detail, in the 20% missing scenario, while median imputation had an average bias of 16.3% for confirmed cases in each CCC, which was lower than the KNN figure, maximum likelihood imputation showed a bias on average of 92.4% for confirmed cases in each CCC, which was the highest figure. During the new normal period in the 25% and 30% missing data scenarios, KNN imputation had average biases for CIR and confirmed cases in each CCC ranging from 21% to 32% for both, while maximum likelihood and moving average imputation showed biases on average above 250% for both CIR and confirmed cases in each CCC.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our study emphasizes the importance of understanding that the specific imputation method used by investigators should be tailored to the specific epidemiological context and data collection environment to ensure reliable estimates of the CIR.</p></sec></abstract><kwd-group><kwd>imputation method</kwd><kwd>COVID-19 incidence rate</kwd><kwd>crude bias</kwd><kwd>crude RMSE</kwd><kwd>root mean square error</kwd><kwd>percentage change</kwd><kwd>pandemic</kwd><kwd>Vietnam</kwd><kwd>surveillance</kwd><kwd>population health</kwd><kwd>analytical method</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Surveillance data are vital for public health policy and resource allocation [<xref ref-type="bibr" rid="ref1">1</xref>]. During the COVID-19 pandemic, the rapid analysis of incomplete data led to potential biases, affecting our understanding of COVID-19 knowledge, attitudes, and behaviors [<xref ref-type="bibr" rid="ref2">2</xref>]. Additionally, a study using US infectious disease surveillance data demonstrated that missing data can impact measured health disparities, emphasizing the need to consider this limitation when interpreting disparity metrics [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>The absence of standardized and systematically collected surveillance data during the COVID-19 outbreak has necessitated the use of robust statistical tools and approaches to address these data gaps. Despite the availability of various analytical techniques, the application of statistical modeling processes has been limited [<xref ref-type="bibr" rid="ref4">4</xref>]. Moreover, when imputation methods have been used, they have often lacked detailed descriptions and transparency [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Addressing the problem of missing data in public health surveillence systems requires system-level solutions, such as collecting more complete laboratory data, improving data linkage, and designing more efficient data collection procedures [<xref ref-type="bibr" rid="ref3">3</xref>]. The analytical challenges posed by the current pandemic present an important opportunity to assess the utility of available statistical methods. Regardless of data quality, missing data and suboptimal analytical strategies can reduce a study&#x2019;s statistical power and lead to biased estimates, resulting in erroneous conclusions. Robust statistical methods are crucial to enhance future data collection efforts, data interpretation, and their clinical and public health implications [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Gaps in the existing literature lie in the inadequate use of statistical modeling approaches to address the problem of missing data in disease and risk factor monitoring systems, particularly during public health emergencies such as the COVID-19 pandemic [<xref ref-type="bibr" rid="ref8">8</xref>]. This shortfall is critical because missing data can significantly hinder the accurate monitoring of disease trends and the formulation of effective public health policies [<xref ref-type="bibr" rid="ref9">9</xref>]. While various imputation methods exist, their application in this context has been limited, leading to uncertainties in disease trend forecasting and policy recommendations. These limitations can result in skewed data interpretations, which may, in turn, affect resource allocation, emergency response strategies, and overall public health outcomes.</p><p>In the present study, we used several theoretical approaches based on statistical modeling and epidemiological concepts to address the challenge of using different statistical methods for handling missing data in the interpretation of community surveillance information collected during different pandemic periods. We evaluated the performance of several imputation strategies to determine the best approaches for dealing with missing data in disease monitoring, showing how handling missing data can affect estimates of the COVID-19 incidence rate (CIR) in different pandemic situations.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Context and Data Source</title><p>This study used data collected in Bac Ninh Province, Vietnam, during the calendar year 2021 from the surveillance system for patients with COVID-19/SARS-CoV-2 who were admitted to the National Institute of Hygiene and Epidemiology in Hanoi, Vietnam.</p><p>The database included information on 13,727 patients with COVID-19 collected from the beginning of the 2021 outbreak in Bac Ninh Province, from January 1, 2021, to December 31, 2021, without any missing data. Based on the information contained in this data set, and because we wanted to restrict our study population to cases that could be transmitted to the broader community, we calculated the CIR only for confirmed cases of COVID-19 (n=10,599; this represents 77% of the data set) that were diagnosed in each community from each district in Bac Ninh Province (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>We decided to focus exclusively on community cases to understand the transmission dynamics in the broader community. We focused on 3 specific variables in the data set: the date of each community-acquired case of COVID-19 that was forwarded to the surveillance system, the community code, and the number of daily cases at the community level (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s2-2"><title>Overview</title><p>We conducted a simulation to calculate various statistical and epidemiological indices of this community epidemic, assessing the effectiveness of different methods for handling missing data across differing missingness proportions and pandemic periods for each of the 7 missing-data analytic methods. The simulation steps began with generating a reference data set by separating the data set into different periods. Subsequently, for each missingness proportion in each period, steps 2 through 4 were repeated, during which statistical and epidemiological indices were calculated for the 7 missing-data handling methods.</p></sec><sec id="s2-3"><title>Step 1: Separating by Period</title><p>We separated the COVID-19 pandemic that was occurring in Bac Ninh Province into 3 distinct time periods using the following working definitions: the first period, the zero COVID-19 period, ran from January 1 to July 4, 2021. This was when the local government had tightened prevention policies and the primary goal was to stop the community transmission of COVID-19. During this period, there were multiple short-range waves of COVID-19 outbreaks, with the peak CIR ranging from 150 to 250 cases daily [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>The next period, the transition period, took place between July 5 and October 22, 2021. During this period, the local government used a flexible pandemic policy with the goal of controlling community transmission of COVID-19 and minimizing the importation of new cases from affected provinces while increasing the population level of COVID-19 vaccine coverage. During this period, the highest CIR was more than 200 cases per day, but there were many days in Bac Ninh province with no notification of cases (CIR=0), with the longest range of zero notification days being more than 2 weeks [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>The final study period, the new normal, ran from October 23, 2021, until the end of the study on December 31, 2021. During this period, the primary goal of public health officials was to open social facilities and terminate all isolation policies. The CIR in this period fluctuated, with multiple long-range waves of outbreaks; during the highest peak, there were more than 600 daily cases of COVID-19 [<xref ref-type="bibr" rid="ref10">10</xref>].</p></sec><sec id="s2-4"><title>Step 2: Generating Simulated Data Sets</title><p>We assumed that there were values missing completely at random in our study, so that the data values missing in our simulation data sets were unrelated to any observed or unobserved data in the data set. In other words, the missing data points did not depend on the values of other variables or the values of the missing variable itself. Inasmuch, we randomly changed the missing data percentage from 5% to 30%, with intervals of 5%, for each time point for the variable &#x201C;cases per day at the community level.&#x201D; This was defined as the total number of confirmed cases of COVID-19 that were diagnosed and reported daily at each community in Bac Ninh Province [<xref ref-type="bibr" rid="ref8">8</xref>], resulting in 6 levels of cutoff percentages for missing data sets during each of the 3 distinct periods. We used the <italic>missMethods</italic> R package to generate missing values based on previous research that has shown the effectiveness of generating missing values in data sets [<xref ref-type="bibr" rid="ref11">11</xref>]; 18 simulated databases were created in our study.</p></sec><sec id="s2-5"><title>Step 3: Handling the Missing Data</title><p>The methods for handling missing data were based on a previous literature review of the techniques used in ecological data sets [<xref ref-type="bibr" rid="ref12">12</xref>]. We selected 7 methods that we deemed to be suitable for imputing missing values from the number of daily cases of COVID-19 occurring in each study community.</p><sec id="s2-5-1"><title>Backfill Imputation</title><p>We used the number of daily cases from the previous day for each community unit as the value for imputation for the missing values of that community. If there were no cases on the previous day to impute, we assumed a missing value of 0 because when no data were available from the previous day, assuming a value of 0 was a conservative approach, indicating no new cases reported. We used the &#x201C;na. locf()&#x201D; function in the <italic>zoo</italic> package of R to conduct this imputation process [<xref ref-type="bibr" rid="ref13">13</xref>].</p></sec><sec id="s2-5-2"><title>Moving Average</title><p>We used the mean of the last 14 days of COVID-19 as the average for imputation. The cutoff time of 14 days served as the reference for the minimum time for a COVID-19 containment cycle (CCC) [<xref ref-type="bibr" rid="ref10">10</xref>]. We created a function to carry out this process.</p></sec><sec id="s2-5-3"><title>Median Imputation</title><p>We created a function in R to use the value of the number of daily cases of COVID-19 during the last 14 days in each community as the reference to find the median for imputing missing values for that community.</p></sec><sec id="s2-5-4"><title>Maximum Likelihood</title><p>We used maximum likelihood estimation (MLE), which is based on a normal distribution. We created a function to conduct this process. First, we calculated the MLE for the mean (&#x03BC;) and SD (&#x03C3;) of the last 14 days of nonmissing values in the input variable <italic>x</italic>. Then, for each missing value, we randomly sampled a value from a normal distribution with mean (&#x03BC;) and SD (&#x03C3;), effectively replacing the missing value.</p></sec><sec id="s2-5-5"><title>Linear Interpolation</title><p>We use the &#x201C;na_interpolation()&#x201D; function in the <italic>imputed</italic> package of R [<xref ref-type="bibr" rid="ref14">14</xref>]. Missing values were replaced by values estimated by linear interpolation, which created a linear relationship between neighboring known data points (the last day and the next day).</p></sec><sec id="s2-5-6"><title>Autoregressive Integrated Moving Average Model</title><p>We used the &#x201C;auto. arima()&#x201D; function in the <italic>forecast</italic> package of R for calculating imputed missing values [<xref ref-type="bibr" rid="ref15">15</xref>]. The autoregressive integrated moving average (ARIMA) model combines 3 key components: <italic>AR</italic> (the &#x201C;autoregressive&#x201D; term), <italic>I</italic> (the &#x201C;differencing&#x201D; term), and <italic>MA</italic> (the &#x201C;moving average&#x201D; term). The <italic>AR</italic> term refers to the past values used for forecasting the next value while the <italic>MA</italic> term is used to define the number of past forecast errors used to predict future values. The order of &#x201C;differencing&#x201D; specifies the number of times the differencing operation is performed on a series to make it stationary. In the default figures, the maximum number of historical observations was set to the last 5 days. The ARIMA model subsequently determined the order of these components (from 1 to 5 previous days might be possibly related to the current data), and imputation values were chosen through data analysis and model selection techniques.</p></sec><sec id="s2-5-7"><title>K-Nearest Neighbor Imputation</title><p>We used the closest data points to the one with missing values. In our study, we used the &#x201C;kNN()&#x201D; function in the <italic>VIM</italic> package of R to fill in missing daily COVID-19 case counts at the community level by K-nearest neighbor (KNN) imputation [<xref ref-type="bibr" rid="ref16">16</xref>]. This method estimates missing values based on nearby data points. We applied KNN with a set number of neighbors, in our example 14 days, representing the minimum time for a CCC in each community [<xref ref-type="bibr" rid="ref10">10</xref>].</p></sec></sec><sec id="s2-6"><title>Step 4: Estimating the Effectiveness</title><p>To illustrate the efficacy of various missing data handling methods in estimating the CIR, we implemented the 7 imputation techniques to address missing data during different study periods and levels of missing data.</p><p>On the statistical side, to assess the extent to which these missing data handling methods mitigated the effects on estimating the CIR, we examined bias and the root mean square error (RMSE) resulting from direct comparisons between the imputed and original values of the daily CIR for a population of 1,000,000 people. We computed the mean absolute crude bias (<inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>) and the mean crude RMSE (<inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>) as indicators of performance [<xref ref-type="bibr" rid="ref8">8</xref>]. To quantify the alterations in CIR between the original and the imputed data sets, we employed the mean absolute percentage change (APC) in the CIR, denoted as <inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>C</mml:mi><mml:mi>I</mml:mi><mml:mi>R</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p><p>From the epidemiological perspective, we used the average number of confirmed cases in each CCC as the reference index to measure the effectiveness of the imputation data. The CCC consisted of several nonpharmacological control strategies aimed at managing the COVID-19 pandemic within each community in Bac Ninh Province [<xref ref-type="bibr" rid="ref10">10</xref>]. We used the mean APC of the mean of the average confirmed cases of COVID-19 for each CCC, referred to as <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>, to discern differences in confirmed cases for each CCC at the community level between the original and imputed data sets (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p><p>R (version 4.2.2; R Foundation for Statistical Computing) was used for all data analyses that were carried out.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>The study received approval in accordance with decision 4326/QD-DHYHN by the Institutional Review Board of Hanoi Medical University. All methods were conducted in compliance with the committee&#x2019;s guidelines and regulations. We received permission for all the data sets in this study from the Vietnam National Institute of Hygiene and Epidemiology for use and analysis. All personal information and identifiers were removed from the data set prior to analysis.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Zero COVID-19 Period</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the results of the imputation methods used to address missing data in the context of the CIR during the zero COVID-19 period. Among these methods, KNN imputation showed the lowest mean ACB and mean crude RMSE values from 5% to 20% missing-data levels. In the 25% to 30% missing-data levels, while KNN imputation and median imputation consistently yielded lower mean ACBs than the other methods, linear interpolation imputation had the lowest mean crude RMSE.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Mean absolute crude bias and mean crude root mean square error (RMSE) when using different imputation methods during the zero COVID-19 period.  <inline-formula><mml:math id="ieqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: the mean absolute crude bias of the COVID-19 incidence rate; ARIMA: autoregressive integrated moving average;  <inline-formula><mml:math id="ieqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: mean crude RMSE of the COVID-19 incidence rate.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v10i1e53719_fig01.png"/></fig><p><xref ref-type="table" rid="table1">Table 1</xref> provides an assessment of the mean APC in the CIR and the average number of confirmed cases in each CCC during the zero COVID-19 period using the 7 imputation methods to address missing data. Median imputation and KNN imputation consistently exhibited the lowest mean APC values for both CIR and for the average number of confirmed cases during each CCC. The moving average imputation method followed as the second-lowest performer for APC in CIR, with the mean APC increasing gradually as the level of missing data increased. Backfill imputation was the second-lowest performer in APC in terms of the average number of confirmed cases during each CCC, with the mean APC value rising nearly similarly to the median imputation results. Both backfill imputation and median imputation had APCs on average for the number of confirmed cases of COVID-19 during each CCC higher than KNN imputation at all levels of missing data. In contrast, the linear interpolation imputation method consistently exhibited the highest mean APC values across the specified levels of missing data. Lastly, the ARIMA model imputation and maximum likelihood imputation methods demonstrated the second-highest mean APC values when missing data levels increased; the ARIMA model imputation had the highest APC on average for the number of confirmed cases of COVID-19 during each CCC.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Mean absolute percentage change in the daily COVID-19 incidence rate (<inline-formula><mml:math id="ieqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>C</mml:mi><mml:mi>I</mml:mi><mml:mi>R</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) and in the mean of the average of confirmed cases of COVID-19 during each COVID-19 containment cycle (<inline-formula><mml:math id="ieqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) when using different imputation methods during the zero COVID-19 period.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Imputation methods</td><td align="left" valign="bottom" colspan="6">Level of missing data, mean (SE)</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">5%</td><td align="left" valign="bottom">10%</td><td align="left" valign="bottom">15%</td><td align="left" valign="bottom">20%</td><td align="left" valign="bottom">25%</td><td align="left" valign="bottom">30%</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8"><bold>Backfill imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">11.1 (8.2)</td><td align="left" valign="top">21.8 (11.1)</td><td align="left" valign="top">33.0 (13.3)</td><td align="left" valign="top">36.0 (13.3)</td><td align="left" valign="top">48.5 (19.4)</td><td align="left" valign="top">51.7 (19.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">4.5 (1.6)</td><td align="left" valign="top">13.1 (2.6)</td><td align="left" valign="top">19.6 (3.0)</td><td align="left" valign="top">28.9 (4.4)</td><td align="left" valign="top">35.6 (4.7)</td><td align="left" valign="top">44.3 (5.4)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Moving average imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">8.3 (2.0)</td><td align="left" valign="top">14.0 (2.4)</td><td align="left" valign="top">23.8 (3.9)</td><td align="left" valign="top">28.9 (4.8)</td><td align="left" valign="top">37.2 (6.5)</td><td align="left" valign="top">39.3 (6.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">24.8 (8.3)</td><td align="left" valign="top">57.6 (15.8)</td><td align="left" valign="top">77.1 (23.6)</td><td align="left" valign="top">215.3 (76.1)</td><td align="left" valign="top">244.2 (78.4)</td><td align="left" valign="top">269.0 (80.4)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Median imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn13"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.9 (1.0)</td><td align="left" valign="top">7.6 (1.4)</td><td align="left" valign="top">13.1 (2.2)</td><td align="left" valign="top">16.8 (2.6)</td><td align="left" valign="top">21.1 (3.1)</td><td align="left" valign="top">24.5 (3.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">4.3 (1.3)</td><td align="left" valign="top">14.3 (4.9)</td><td align="left" valign="top">18.6 (5.4)</td><td align="left" valign="top">64.5 (43.1)</td><td align="left" valign="top">36.0 (11.8)</td><td align="left" valign="top">42.5 (11.9)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Maximum likelihood imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn15"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">13.2 (4.2)</td><td align="left" valign="top">24.1 (7.2)</td><td align="left" valign="top">41.3 (10.7)</td><td align="left" valign="top">39.9 (8.0)</td><td align="left" valign="top">45.9 (7.6)</td><td align="left" valign="top">53.5 (11.0)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn16"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">24.8 (8.3)</td><td align="left" valign="top">57.6 (15.8)</td><td align="left" valign="top">77.1 (23.6)</td><td align="left" valign="top">215.3 (76.1)</td><td align="left" valign="top">244.2 (78.4)</td><td align="left" valign="top">269.0 (80.4)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Linear interpolation imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn17"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">15.5 (12.8)</td><td align="left" valign="top">26.9 (14.3)</td><td align="left" valign="top">33.6 (14.6)</td><td align="left" valign="top">39.5 (14.9)</td><td align="left" valign="top">48.8 (18.5)</td><td align="left" valign="top">56.2 (19.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn18"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">6.1 (2.2)</td><td align="left" valign="top">18.1 (3.8)</td><td align="left" valign="top">24.3 (4.0)</td><td align="left" valign="top">37.2 (6.1)</td><td align="left" valign="top">49.8 (6.6)</td><td align="left" valign="top">56.9 (7.0)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Autoregressive integrated moving average model imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn19"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">10.2 (1.4)</td><td align="left" valign="top">17.5 (2.4)</td><td align="left" valign="top">27.5 (3.7)</td><td align="left" valign="top">36.5 (4.9)</td><td align="left" valign="top">46.5 (6.4)</td><td align="left" valign="top">53.9 (7.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn20"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">226.3 (27.0)</td><td align="left" valign="top">544.4 (51.6)</td><td align="left" valign="top">1473.9 (238.7)</td><td align="left" valign="top">3295.6 (434.8)</td><td align="left" valign="top">5126.4 (551.1)</td><td align="left" valign="top">6955.7 (622.4)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>K-nearest neighbor imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn21"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.7 (1.0)</td><td align="left" valign="top">6.9 (1.4)</td><td align="left" valign="top">10.6 (1.7)</td><td align="left" valign="top">10.3 (1.4)</td><td align="left" valign="top">15.8 (1.9)</td><td align="left" valign="top">17.8 (2.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn22"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.6 (0.7)</td><td align="left" valign="top">9.3 (1.7)</td><td align="left" valign="top">14.1 (2.1)</td><td align="left" valign="top">19.9 (2.7)</td><td align="left" valign="top">23.4 (3.4)</td><td align="left" valign="top">29.0 (3.2)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Transition Period</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows the results of the different imputation methods used to address missing data in the context of the CIR during the transition period. The ARIMA model and KNN imputation methods consistently demonstrated the lowest mean ACB across all levels of missing data; the ARIMA model and median imputation methods had the same results in terms of the mean ACB and mean RMSE. With regards to the mean crude RMSE, the moving average and ARIMA model imputation methods consistently yielded lower values than the other methods across varying levels of missing data. On the other hand, the maximum likelihood imputation method generally resulted in higher mean ACBs and mean crude RMSEs compared with alternative methods. The backfill imputation method exhibited the second-highest mean crude RMSE, particularly at the 20% to 30% level of missing data.</p><p><xref ref-type="table" rid="table2">Table 2</xref> presents an overview of the 7 imputation methods used to address missing data in the CIR and the average number of confirmed cases during each CCC. The median and ARIMA model imputation methods consistently displayed relatively lower mean APC values for both the CIR and average of confirmed cases in each CCC than the other analytic methods. The backfill imputation and KNN imputation methods provided the second-lowest mean APC values as the level of missing data increased. In contrast, the maximum likelihood and moving average imputation methods displayed comparatively higher mean APC values than the other methods of imputation.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The mean absolute crude bias and mean crude root mean square error (RMSE) when using different imputation methods during the transition period.  <inline-formula><mml:math id="ieqn23"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: the mean absolute crude bias of the COVID-19 incidence rate; ARIMA: autoregressive integrated moving average; <inline-formula><mml:math id="ieqn24"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: mean crude RMSE of the COVID-19 incidence rate.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v10i1e53719_fig02.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Mean absolute percentage change in the daily COVID-19 incidence rate (<inline-formula><mml:math id="ieqn25"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>C</mml:mi><mml:mi>I</mml:mi><mml:mi>R</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) and in the mean of the average number of confirmed cases of COVID-19 during each COVID-19 containment cycle (<inline-formula><mml:math id="ieqn26"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) when using different imputation methods during the transition period.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Imputation methods</td><td align="left" valign="bottom" colspan="6">Level of missing data, mean (SE)</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">5%</td><td align="left" valign="bottom">10%</td><td align="left" valign="bottom">15%</td><td align="left" valign="bottom">20%</td><td align="left" valign="bottom">25%</td><td align="left" valign="bottom">30%</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8"><bold>Backfill imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn27"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">2.8 (1.1)</td><td align="left" valign="top">12.3 (6.6)</td><td align="left" valign="top">15.9 (6.7)</td><td align="left" valign="top">16.7 (6.7)</td><td align="left" valign="top">19.1 (6.8)</td><td align="left" valign="top">26.0 (8.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn28"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">8.7 (2.4)</td><td align="left" valign="top">16.7 (3.3)</td><td align="left" valign="top">24.6 (4.0)</td><td align="left" valign="top">31.6 (4.4)</td><td align="left" valign="top">40.7 (5.3)</td><td align="left" valign="top">48.9 (5.3)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Moving average imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn29"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">11.4 (2.4)</td><td align="left" valign="top">20.0 (3.8)</td><td align="left" valign="top">30.4 (5.7)</td><td align="left" valign="top">31.4 (5.6)</td><td align="left" valign="top">34.9 (6.3)</td><td align="left" valign="top">42.7 (7.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn30"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">30.1 (3.4)</td><td align="left" valign="top">33.7 (4.4)</td><td align="left" valign="top">49.7 (11.4)</td><td align="left" valign="top">98.2 (17.8)</td><td align="left" valign="top">118.7 (19.0)</td><td align="left" valign="top">167.0 (25.3)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Median imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn31"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.0 (1.3)</td><td align="left" valign="top">3.6 (1.3)</td><td align="left" valign="top">6.6 (1.9)</td><td align="left" valign="top">8.7 (2.1)</td><td align="left" valign="top">10.6 (2.3)</td><td align="left" valign="top">12.43 (2.43)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn32"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.8 (1.4)</td><td align="left" valign="top">9.4 (2.6)</td><td align="left" valign="top">13.5 (3.0)</td><td align="left" valign="top">16.3 (3.2)</td><td align="left" valign="top">22.8 (3.8)</td><td align="left" valign="top">25.7 (3.9)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Maximum likelihood imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn33"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">18.6 (3.9)</td><td align="left" valign="top">32.6 (9.7)</td><td align="left" valign="top">51.9 (11.1)</td><td align="left" valign="top">54.4 (11.6)</td><td align="left" valign="top">58.3 (14.0)</td><td align="left" valign="top">48.8 (11.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn34"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">31.6 (3.6)</td><td align="left" valign="top">37.5 (6.1)</td><td align="left" valign="top">57.3 (12.9)</td><td align="left" valign="top">92.4 (19.3)</td><td align="left" valign="top">134.0 (22.3)</td><td align="left" valign="top">158.4 (24.9)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Linear interpolation imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn35"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">8.4 (3.6)</td><td align="left" valign="top">16.4 (5.4)</td><td align="left" valign="top">22.4 (7.5)</td><td align="left" valign="top">24.0 (7.5)</td><td align="left" valign="top">28.4 (7.8)</td><td align="left" valign="top">32.0 (9.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn36"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">9.8 (2.0)</td><td align="left" valign="top">18.8 (2.8)</td><td align="left" valign="top">25.5 (3.3)</td><td align="left" valign="top">30.8 (3.6)</td><td align="left" valign="top">37.2 (4.2)</td><td align="left" valign="top">44.4 (4.4)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>Autoregressive integrated moving average model imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn37"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.0 (1.3)</td><td align="left" valign="top">3.7 (1.3)</td><td align="left" valign="top">6.7 (1.9)</td><td align="left" valign="top">8.7 (2.1)</td><td align="left" valign="top">10.6 (2.3)</td><td align="left" valign="top">12.4 (2.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn38"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">3.8 (1.4)</td><td align="left" valign="top">9.4 (2.6)</td><td align="left" valign="top">13.5 (3.0)</td><td align="left" valign="top">16.3 (3.2)</td><td align="left" valign="top">22.8 (3.8)</td><td align="left" valign="top">25.7 (3.9)</td></tr><tr><td align="left" valign="top" colspan="8"><bold>K-nearest neighbor imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn39"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">5.9 (2.0)</td><td align="left" valign="top">5.4 (1.7)</td><td align="left" valign="top">10.6 (2.5)</td><td align="left" valign="top">9.7 (2.0)</td><td align="left" valign="top">17.7 (3.9)</td><td align="left" valign="top">16.9 (3.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn40"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">5.6 (1.9)</td><td align="left" valign="top">8.8 (2.5)</td><td align="left" valign="top">13.5 (2.8)</td><td align="left" valign="top">17.0 (3.3)</td><td align="left" valign="top">12.7 (2.8)</td><td align="left" valign="top">22.3 (3.5)</td></tr></tbody></table></table-wrap></sec><sec id="s3-3"><title>New Normal Period</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates the mean ACB and mean crude RMSE of the 7 imputation methods used to address missing data in the CIR during the new normal period. Both the backfill and linear interpolation imputation methods consistently demonstrated the lowest mean ACB across all levels of missing data. The ARIMA model imputation and KNN imputation methods provided the second-lowest absolute mean ACB and mean crude RMSE compared with the other analytic methods across different levels of missing data. On the other hand, the maximum likelihood and moving average imputation methods showed the highest mean ACB and mean crude RMSE as the level of missing data increased.</p><p><xref ref-type="table" rid="table3">Table 3</xref> displays the mean APC between the original and imputation data sets when we addressed varying levels of missing data in the CIR and in the average number of confirmed cases in each CCC during the new normal period. Three statistical methods, namely the backfill, linear interpolation, and KNN imputation methods, consistently exhibited relatively lower mean APC values compared with the other imputation methods. While the ARIMA model imputation method provided the second-lowest mean APC in CIR values as the level of missing data increased, median imputation had the second-lowest mean APC in terms of the average number of confirmed cases in each CCC at all levels of missing data. In contrast, the maximum likelihood and moving average imputation methods consistently displayed higher mean APC values in the CIR and in the average number of confirmed cases during each CCC than the other methods of imputing missing data.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Mean absolute crude bias and mean crude root mean square error (RMSE) when using different imputation methods during the new normal period.  <inline-formula><mml:math id="ieqn41"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: the mean absolute crude bias of the COVID-19 incidence rate; ARIMA: autoregressive integrated moving average; <inline-formula><mml:math id="ieqn42"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mrow><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:math></inline-formula>: mean crude RMSE of the COVID-19 incidence rate.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v10i1e53719_fig03.png"/></fig><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Mean absolute percentage change in the daily COVID-19 incidence rate (<inline-formula><mml:math id="ieqn43"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula>) and in the mean of the average number of confirmed cases of COVID-19 during each COVID-19 containment cycle (<inline-formula><mml:math id="ieqn44"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) when using different imputation methods during the new normal period.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Imputation methods</td><td align="left" valign="bottom" colspan="6">Level of missing data, mean (SE)</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">5%</td><td align="left" valign="bottom">10%</td><td align="left" valign="bottom">15%</td><td align="left" valign="bottom">20%</td><td align="left" valign="bottom">25%</td><td align="left" valign="bottom">30%</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7"><bold>Backfill imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn45"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">7.9 (5.7)</td><td align="left" valign="top">15.6 (8.1)</td><td align="left" valign="top">19.8 (9.6)</td><td align="left" valign="top">23.9 (10.1)</td><td align="left" valign="top">28.9 (10.2)</td><td align="left" valign="top">39.3 (12.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn46"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">8.3 (1.8)</td><td align="left" valign="top">14.1 (2.2)</td><td align="left" valign="top">21.5 (3.1)</td><td align="left" valign="top">29.9 (4.0)</td><td align="left" valign="top">38.36 (5.12)</td><td align="left" valign="top">45.84 (5.79)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Moving average imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn47"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">80.9 (30.0)</td><td align="left" valign="top">189.7 (79.2)</td><td align="left" valign="top">301.2 (117.2)</td><td align="left" valign="top">390.3 (144.4)</td><td align="left" valign="top">491.7 (194.2)</td><td align="left" valign="top">578.1 (212.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn48"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">54.6 (6.6)</td><td align="left" valign="top">102.2 (11.3)</td><td align="left" valign="top">142.7 (14.8)</td><td align="left" valign="top">220.7 (17.9)</td><td align="left" valign="top">259.7 (19.4)</td><td align="left" valign="top">303.2 (21.4)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Median imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn49"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">58.5 (25.0)</td><td align="left" valign="top">134.7 (65.0)</td><td align="left" valign="top">215.0 (89.8)</td><td align="left" valign="top">279.8 (111.6)</td><td align="left" valign="top">359.3 (152.0)</td><td align="left" valign="top">439.1 (171.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn50"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">25.5 (5.2)</td><td align="left" valign="top">45.4 (8.3)</td><td align="left" valign="top">66.2 (10.6)</td><td align="left" valign="top">104.3 (14.9)</td><td align="left" valign="top">124.2 (16.6)</td><td align="left" valign="top">158.1 (20.8)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Maximum likelihood imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn51"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">92.1 (33.8)</td><td align="left" valign="top">213.8 (89.7)</td><td align="left" valign="top">289.7 (107.0)</td><td align="left" valign="top">321.3 (141.7)</td><td align="left" valign="top">472.5 (187.5)</td><td align="left" valign="top">605.7 (235.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn52"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">58.6 (7.6)</td><td align="left" valign="top">105.5 (11.5)</td><td align="left" valign="top">145.0 (15.8)</td><td align="left" valign="top">221.8 (19.5)</td><td align="left" valign="top">262.5 (20.9)</td><td align="left" valign="top">313.7 (24.2)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Linear interpolation imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn53"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">4.8 (2.9)</td><td align="left" valign="top">8.6 (3.6)</td><td align="left" valign="top">10.9 (4.3)</td><td align="left" valign="top">13.4 (4.6)</td><td align="left" valign="top">29.6 (10.8)</td><td align="left" valign="top">35.7 (11.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn54"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">9.2 (2.4)</td><td align="left" valign="top">15.8 (3.0)</td><td align="left" valign="top">23.6 (3.7)</td><td align="left" valign="top">31.2 (4.3)</td><td align="left" valign="top">41.3 (5.4)</td><td align="left" valign="top">44.8 (5.6)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>Autoregressive integrated moving average model imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn55"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">22.0 (7.0)</td><td align="left" valign="top">43.5 (15.3)</td><td align="left" valign="top">62.7 (21.2)</td><td align="left" valign="top">72.6 (24.0)</td><td align="left" valign="top">58.4 (17.7)</td><td align="left" valign="top">69.5 (20.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn56"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">50.5 (5.7)</td><td align="left" valign="top">87.9 (9.4)</td><td align="left" valign="top">118.4 (12.8)</td><td align="left" valign="top">174.5 (15.0)</td><td align="left" valign="top">169.3 (13.2)</td><td align="left" valign="top">190.1 (14.1)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>K-nearest neighbor imputation</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn57"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">I</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">4.5 (1.6)</td><td align="left" valign="top">8.5 (1.9)</td><td align="left" valign="top">9.7 (1.4)</td><td align="left" valign="top">11.3 (1.5)</td><td align="left" valign="top">23.0 (3.1)</td><td align="left" valign="top">21.2 (2.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><inline-formula><mml:math id="ieqn58"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">s</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">4.2 (0.8)</td><td align="left" valign="top">12.7 (2.3)</td><td align="left" valign="top">16.6 (1.8)</td><td align="left" valign="top">25.0 (2.4)</td><td align="left" valign="top">24.7 (2.2)</td><td align="left" valign="top">32.6 (2.7)</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>In examining our study&#x2019;s primary objective, which was to demonstrate how different methods of handling missing data affect estimation of the CIR, we highlight how the ongoing pandemic, as well as the preventive measures and health policy recommendations that were used to control future cases of COVID-19 in the community, could affect the effectiveness of different analytical methods. After examining 7 imputation approaches, we found that KNN and median imputation performed the best during the zero COVID-19 period, with KNN also having the lowest mean APC in terms of the CIR. ARIMA and median imputation were the most successful analytic approaches used during the transition period, whereas backfill, linear interpolation, and KNN performed the best during the new normal phase. Inasmuch, our findings show that one&#x2019;s selection of the different imputation methods that could be used must take into account the specific pandemic conditions to increase the accuracy of predicted incidence rate estimates.</p></sec><sec id="s4-2"><title>Comparisons With Prior Work</title><p>Several of our findings differ from those of a study that was designed to find the best way to handle missing data for estimating a wellness index over the lifetime based on panel data from smart devices that collected various types of life logs, such as steps walked and sleep duration [<xref ref-type="bibr" rid="ref17">17</xref>]. Our findings also differ from a study that examined how well artificial neural networks handle missing data collected in a pediatric intensive care unit [<xref ref-type="bibr" rid="ref18">18</xref>]. The differences between our results and these previous studies were due to the performance of different imputation methods and a focus on different pandemic time periods. This underscores the importance of understanding the particular pandemic situation and developing and using health policy measures considering the potential biases and effectiveness of these analytic techniques. During periods of strict population-based control, such as the zero COVID-19 period, simpler methods, such as KNN and median imputation methods, which rely on recent data, could be used. In contrast, during more volatile periods of viral infections, such as the transition and new normal periods, methods that model temporal dependencies or use neighboring data points, namely the ARIMA model and KNN imputation methods, are more effective.</p><p>Our results also highlight the limitations of certain analytic methods, such as the maximum likelihood and moving average, which generally showed higher mean ACB and crude RMSE values, indicating less robustness in handling variability in the extent of missing data during different pandemic phases. These methods are, however, often used to handle missing data in medical data sets. For example, in a study involving 50 individuals selected from a 2 &#x00D7; 2 randomized controlled trial, the moving average method showed the best agreement with observed values [<xref ref-type="bibr" rid="ref19">19</xref>]. This study compared various data imputation methods for calculating body weight variability using both linear and nonlinear approaches. Moreover, maximum likelihood imputation methods have been used for handing missing data at random in a number of randomized controlled trials [<xref ref-type="bibr" rid="ref20">20</xref>]. The limitations of these methods in our study may be attributed to their underlying assumptions, which might not hold in the rapidly changing context of a pandemic, leading to increased bias and error in the calculation and interpretation of imputed data and illness incidence rates.</p><p>The effectiveness of each imputation method that we used in this study was influenced by the underlying data structure and characteristics of missingness during each pandemic period. For example, the backfill method, which assumes that the last observed value can be carried forward, may work during periods of low variability but can introduce significant bias during high variability periods, such as the new normal period. Similarly, moving average methods might not capture true variability in the number of cases of disease that may occur during rapid changes in transmission dynamics. Our findings differ from an observational study in 2023 that used moving average imputation for 3 public, completed time-series data sets that were collected from power equipment [<xref ref-type="bibr" rid="ref21">21</xref>]. This study aimed to create a customized methodology that combined an asymmetric denoising autoencoder and a moving average filter to impute missing data in time-series monitoring data. When choosing different imputation methods, it is crucial to consider epidemic-specific and contextual factors. Data may be missing due to overwhelmed health care systems or reporting delays in the number of cases of confirmed illness, leading to errors in data interpretation and policy recommendations to contain the spread of disease. In addition, the stage of the epidemic and extent of use and effectiveness of public health interventions can impact the suitability of different imputation techniques. Understanding these factors is essential to selecting methods that minimize bias and accurately reflect underlying trends in disease magnitude and health-related outcomes.</p><p>These findings emphasize the need for transparency and detailed reporting in the application of data imputation methods. The lack of detailed descriptions and transparency in the reporting and application of these methods in previous studies has been a significant limitation in interpreting the published literature. By providing a comprehensive analysis of various imputation techniques and their performance across different pandemic phases, this study contributes to a better understanding of how to more effectively handle missing data in disease surveillance. The detailed comparison of methods and the consideration of different pandemic phases provide valuable insights for future research and public health practice.</p></sec><sec id="s4-3"><title>Study Strengths and Limitations</title><p>The main strength of this study is that we used individual data to calculate the number of new cases of COVID-19 that were diagnosed and reported to public health authorities on a daily basis in each of the communities studied. Moreover, we were able to compare the original values with the imputed estimates that were collected during the 3 periods of this ongoing epidemic in a large Vietnamese province.</p><p>There are some limitations of our study, however, that need to be kept in mind in the interpretation of our principal study findings. Because we targeted an extensive range of missing values greater than 5%, we did not use any methods to ignore missing data or delete the missing values, such as listwise deletion or pairwise deletion. In addition, we used large imputed data sets and did not use methods useful for handling missing data in studies with small sample sizes, such as data augmentation [<xref ref-type="bibr" rid="ref22">22</xref>]. Furthermore, our results are primarily limited to handling missing data with missing completely at random patterns without a need to account for potential biases that may have been introduced by nonrandom missing data. Scenarios in which data were missing at random or missing not at random were not addressed in this study [<xref ref-type="bibr" rid="ref23">23</xref>]. Future investigations will be needed to analyze these types of missing-data scenarios. Another limitation is that our study did not account for unexpected cases, as the data were produced based on existing data and therefore may not represent some unforeseen phenomena.</p></sec><sec id="s4-4"><title>Conclusions</title><p>This study illustrates that the choice of imputation method used should be tailored to the specific epidemiological context and data collection environment. Statistical modeling and a thorough understanding of local pandemic dynamics are essential for improving the accuracy of incidence rate estimates and, in turn, public health responses to ongoing disease trends and the development and application of disease control measures. Future research should continue to refine these methods, ensuring that they can adapt to the evolving challenges of disease surveillance in public health emergencies. By improving currently available imputation methods, we can facilitate more accurate and dependable public health responses in future situations, ultimately contributing to better resource allocation, emergency response strategies, and community health outcomes.</p></sec></sec></body><back><ack><p>Assessment of data from patients who were diagnosed with COVID-19 in Bac Ninh Province in 2021 from the surveillance system of COVID-19/SARS-CoV-2 patients was provided by the National Institute of Hygiene and Epidemiology. The research reported in this paper was supported by the Fogarty International Center of the US National Institutes of Health (award D43 TW011394-01). This research did not involve animal or human participants, nor did it take place in any private or protected areas. No specific permissions were required for corresponding locations.</p></ack><notes><sec><title>Data Availability</title><p>The data sets generated and analyzed during this study are available in the open access GitHub repository [<xref ref-type="bibr" rid="ref24">24</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>HTP and TD conceived the idea for this study. HTP, QTP, and CKN conducted the data collection for the study. HTP and TD developed the theory and performed the data analysis. HTP and JB verified the analytical methods and supervised the findings of this research. HTP, JB, and TD wrote the manuscript with input from all authors. JB, HLN, RG, QLP, and LMG provided critical feedback and helped shape the research, analysis, and manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ACB</term><def><p>absolute crude bias</p></def></def-item><def-item><term id="abb2">APC</term><def><p>absolute percentage change</p></def></def-item><def-item><term id="abb3">ARIMA</term><def><p>autoregressive integrated moving average</p></def></def-item><def-item><term id="abb4">CCC</term><def><p>COVID-19 containment cycle</p></def></def-item><def-item><term id="abb5">CIR</term><def><p>COVID-19 incidence rate</p></def></def-item><def-item><term id="abb6">KNN</term><def><p>K-nearest neighbor</p></def></def-item><def-item><term id="abb7">MLE</term><def><p>maximum likelihood estimation</p></def></def-item><def-item><term id="abb8">RMSE</term><def><p>root mean square error</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Nsubuga</surname><given-names>P</given-names> </name><name name-style="western"><surname>White</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Thacker</surname><given-names>SB</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Jamison</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Breman</surname><given-names>JG</given-names> </name><name name-style="western"><surname>Measham</surname><given-names>AR</given-names> </name></person-group><article-title>Public health surveillance: a tool for targeting and monitoring interventions</article-title><source>Disease Control Priorities in Developing Countries</source><year>2006</year><access-date>2024-08-06</access-date><edition>2</edition><publisher-name>The World Bank</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/books/NBK11770/">http://www.ncbi.nlm.nih.gov/books/NBK11770/</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weiss</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Waller</surname><given-names>LA</given-names> </name></person-group><article-title>The impact of nonrandom missingness in surveillance data for population-level summaries: simulation study</article-title><source>JMIR Public Health Surveill</source><year>2022</year><month>09</month><day>9</day><volume>8</volume><issue>9</issue><fpage>e37887</fpage><pub-id pub-id-type="doi">10.2196/37887</pub-id><pub-id pub-id-type="medline">36083618</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ansari</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hart-Malloy</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rosenberg</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Trigg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>EG</given-names> </name></person-group><article-title>Modeling the potential impact of missing race and ethnicity data in infectious disease surveillance systems on disparity measures: scenario analysis of different imputation strategies</article-title><source>JMIR Public Health Surveill</source><year>2022</year><month>11</month><day>9</day><volume>8</volume><issue>11</issue><fpage>e38037</fpage><pub-id pub-id-type="doi">10.2196/38037</pub-id><pub-id pub-id-type="medline">36350701</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masconi</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Matsha</surname><given-names>TE</given-names> </name><name name-style="western"><surname>Echouffo-Tcheugui</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Erasmus</surname><given-names>RT</given-names> </name><name name-style="western"><surname>Kengne</surname><given-names>AP</given-names> </name></person-group><article-title>Reporting and handling of missing data in predictive research for prevalent undiagnosed type 2 diabetes mellitus: a systematic review</article-title><source>EPMA J</source><year>2015</year><volume>6</volume><issue>1</issue><fpage>7</fpage><pub-id pub-id-type="doi">10.1186/s13167-015-0028-0</pub-id><pub-id pub-id-type="medline">25829972</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JAC</given-names> </name><name name-style="western"><surname>White</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Carlin</surname><given-names>JB</given-names> </name><etal/></person-group><article-title>Multiple imputation for missing data in epidemiological and clinical research: potential and pitfalls</article-title><source>BMJ</source><year>2009</year><month>06</month><day>29</day><volume>338</volume><fpage>b2393</fpage><pub-id pub-id-type="doi">10.1136/bmj.b2393</pub-id><pub-id pub-id-type="medline">19564179</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kang</surname><given-names>H</given-names> </name></person-group><article-title>The prevention and handling of the missing data</article-title><source>Korean J Anesthesiol</source><year>2013</year><month>05</month><volume>64</volume><issue>5</issue><fpage>402</fpage><lpage>406</lpage><pub-id pub-id-type="doi">10.4097/kjae.2013.64.5.402</pub-id><pub-id pub-id-type="medline">23741561</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>FS</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Link</surname><given-names>NB</given-names> </name><etal/></person-group><article-title>Estimating the cumulative incidence of COVID-19 in the United States using influenza surveillance, virologic testing, and mortality data: four complementary approaches</article-title><source>PLoS Comput Biol</source><year>2021</year><month>06</month><volume>17</volume><issue>6</issue><fpage>e1008994</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1008994</pub-id><pub-id pub-id-type="medline">34138845</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hategeka</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gr&#x00E9;pin</surname><given-names>KA</given-names> </name></person-group><article-title>Addressing missing values in routine health information system data: an evaluation of imputation methods using data from the Democratic Republic of the Congo during the COVID-19 pandemic</article-title><source>Popul Health Metr</source><year>2021</year><month>11</month><day>4</day><volume>19</volume><issue>1</issue><fpage>44</fpage><pub-id pub-id-type="doi">10.1186/s12963-021-00274-z</pub-id><pub-id pub-id-type="medline">34736462</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Handling missing values in healthcare data: a systematic review of deep learning-based imputation techniques</article-title><source>Artif Intell Med</source><year>2023</year><month>08</month><volume>142</volume><fpage>102587</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102587</pub-id><pub-id pub-id-type="medline">37316097</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Toan</surname><given-names>DTT</given-names> </name><name name-style="western"><surname>Pham</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>KC</given-names> </name><etal/></person-group><article-title>Shift from a zero-COVID strategy to a new-normal strategy for controlling SARS-COV-2 infections in Vietnam</article-title><source>Epidemiol Infect</source><year>2023</year><month>07</month><day>4</day><volume>151</volume><fpage>e117</fpage><pub-id pub-id-type="doi">10.1017/S0950268823001048</pub-id><pub-id pub-id-type="medline">37401482</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Santos</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Pereira</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>AF</given-names> </name><name name-style="western"><surname>Soares</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Santos</surname><given-names>J</given-names> </name><name name-style="western"><surname>Abreu</surname><given-names>PH</given-names> </name></person-group><article-title>Generating synthetic missing data: a review by missing mechanism</article-title><source>IEEE Access</source><year>2019</year><volume>7</volume><fpage>11651</fpage><lpage>11667</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2019.2891360</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hossie</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Gobin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>DL</given-names> </name></person-group><article-title>Confronting missing ecological data in the age of pandemic lockdown</article-title><source>Front Ecol Evol</source><year>2021</year><month>08</month><volume>9</volume><fpage>669477</fpage><pub-id pub-id-type="doi">10.3389/fevo.2021.669477</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeileis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Grothendieck</surname><given-names>G</given-names> </name></person-group><article-title>Zoo: S3
 infrastructure for regular and irregular time series</article-title><source>J Stat Soft</source><year>2005</year><volume>14</volume><issue>6</issue><fpage>1</fpage><lpage>27</lpage><pub-id pub-id-type="doi">10.18637/jss.v014.i06</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moritz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bartz-Beielstein</surname><given-names>T</given-names> </name></person-group><article-title>ImputeTS: time series missing value imputation in R</article-title><source>R J</source><year>2017</year><volume>9</volume><issue>1</issue><fpage>207</fpage><pub-id pub-id-type="doi">10.32614/RJ-2017-009</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hyndman</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Khandakar</surname><given-names>Y</given-names> </name></person-group><article-title>Automatic time series forecasting: the forecast package for R</article-title><source>J Stat Soft</source><year>2008</year><volume>27</volume><issue>3</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.18637/jss.v027.i03</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kowarik</surname><given-names>A</given-names> </name><name name-style="western"><surname>Templ</surname><given-names>M</given-names> </name></person-group><article-title>Imputation with the R package VIM</article-title><source>J Stat Soft</source><year>2016</year><volume>74</volume><issue>7</issue><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.18637/jss.v074.i07</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>KJ</given-names> </name></person-group><article-title>Missing-data handling methods for lifelogs-based wellness index estimation: comparative analysis with panel data</article-title><source>JMIR Med Inform</source><year>2020</year><month>12</month><day>17</day><volume>8</volume><issue>12</issue><fpage>e20597</fpage><pub-id pub-id-type="doi">10.2196/20597</pub-id><pub-id pub-id-type="medline">33331831</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghanad Poor</surname><given-names>N</given-names> </name><name name-style="western"><surname>West</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Sreepada</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Murthy</surname><given-names>S</given-names> </name><name name-style="western"><surname>G&#x00F6;rges</surname><given-names>M</given-names> </name></person-group><article-title>An artificial neural network&#x2013;based pediatric mortality risk score: development and performance evaluation using data from a large North American registry</article-title><source>JMIR Med Inform</source><year>2021</year><month>08</month><day>31</day><volume>9</volume><issue>8</issue><fpage>e24079</fpage><pub-id pub-id-type="doi">10.2196/24079</pub-id><pub-id pub-id-type="medline">34463636</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Turicchi</surname><given-names>J</given-names> </name><name name-style="western"><surname>O&#x2019;Driscoll</surname><given-names>R</given-names> </name><name name-style="western"><surname>Finlayson</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Data imputation and body weight variability calculation using linear and nonlinear methods in data collected from digital smart scales: simulation and validation study</article-title><source>JMIR Mhealth Uhealth</source><year>2020</year><month>09</month><day>11</day><volume>8</volume><issue>9</issue><fpage>e17977</fpage><pub-id pub-id-type="doi">10.2196/17977</pub-id><pub-id pub-id-type="medline">32915155</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldberg</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Bolt</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Davidson</surname><given-names>RJ</given-names> </name></person-group><article-title>Data missing not at random in mobile health research: assessment of the problem and a case for sensitivity analyses</article-title><source>J Med Internet Res</source><year>2021</year><month>06</month><day>15</day><volume>23</volume><issue>6</issue><fpage>e26749</fpage><pub-id pub-id-type="doi">10.2196/26749</pub-id><pub-id pub-id-type="medline">34128810</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Hua</surname><given-names>L</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>Y</given-names> </name></person-group><article-title>Multi-type missing imputation of time-series power equipment monitoring data based on moving average filter-asymmetric denoising autoencoder</article-title><source>Sensors (Basel)</source><year>2023</year><month>12</month><day>8</day><volume>23</volume><issue>24</issue><fpage>9697</fpage><pub-id pub-id-type="doi">10.3390/s23249697</pub-id><pub-id pub-id-type="medline">38139543</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schafer</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Graham</surname><given-names>JW</given-names> </name></person-group><article-title>Missing data: our view of the state of the art</article-title><source>Psychol Methods</source><year>2002</year><month>06</month><volume>7</volume><issue>2</issue><fpage>147</fpage><lpage>177</lpage><pub-id pub-id-type="medline">12090408</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Jamshidian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mata</surname><given-names>M</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Lee</surname><given-names>SK</given-names> </name></person-group><article-title>Chapter 2: advances in analysis of mean and covariance structure when data are incomplete</article-title><source>Handbook of Latent Variable and Related Models</source><year>2007</year><publisher-name>North Holland</publisher-name><fpage>21</fpage><lpage>44</lpage><pub-id pub-id-type="doi">10.1016/S1871-0301(06)01002-X</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Thanhph58/Handing-missing-data</article-title><source>GitHub</source><access-date>2024-08-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/Thanhph58/Handing-missing-data">https://github.com/Thanhph58/Handing-missing-data</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Study population characteristics according to the study period.</p><media xlink:href="publichealth_v10i1e53719_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Example of data sets and characteristics of study variables.</p><media xlink:href="publichealth_v10i1e53719_app2.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Study formulas.</p><media xlink:href="publichealth_v10i1e53719_app3.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material></app-group></back></article>