<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id><journal-id journal-id-type="publisher-id">publichealth</journal-id><journal-id journal-id-type="index">9</journal-id><journal-title>JMIR Public Health and Surveillance</journal-title><abbrev-journal-title>JMIR Public Health Surveill</abbrev-journal-title><issn pub-type="epub">2369-2960</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e73916</article-id><article-id pub-id-type="doi">10.2196/73916</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Comparing Multiple Imputation Methods to Address Missing Patient Demographics in Immunization Information Systems: Retrospective Cohort Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Brown</surname><given-names>Sara</given-names></name><degrees>CHES, MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kudia</surname><given-names>Ousswa</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kleine</surname><given-names>Kaye</given-names></name><degrees>MPH, MBA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kidd</surname><given-names>Bryndan</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wines</surname><given-names>Robert</given-names></name><degrees>CPM, CHP, BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Meckes</surname><given-names>Nathanael</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Scientific Services - Analytics, Scientific Technologies Corporation (United States)</institution><addr-line>411 S 1st St</addr-line><addr-line>Phoenix</addr-line><addr-line>AZ</addr-line><country>United States</country></aff><aff id="aff2"><institution>Immunization Services, West Virginia Department of Health and Human Services</institution><addr-line>Charleston</addr-line><addr-line>WV</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Sanchez</surname><given-names>Travis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chan</surname><given-names>Kin Wai</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ou</surname><given-names>Lihong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Sara Brown, CHES, MPH, Scientific Services - Analytics, Scientific Technologies Corporation (United States), 411 S 1st St, Phoenix, AZ, 85004, United States, 1 480-745-8500; <email>sara_brown@stchome.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>26</day><month>8</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e73916</elocation-id><history><date date-type="received"><day>13</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>01</day><month>07</month><year>2025</year></date><date date-type="accepted"><day>04</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sara Brown, Ousswa Kudia, Kaye Kleine, Bryndan Kidd, Robert Wines, Nathanael Meckes. Originally published in JMIR Public Health and Surveillance (<ext-link ext-link-type="uri" xlink:href="https://publichealth.jmir.org">https://publichealth.jmir.org</ext-link>), 26.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://publichealth.jmir.org">https://publichealth.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://publichealth.jmir.org/2025/1/e73916"/><abstract><sec><title>Background</title><p>Immunization Information Systems (IIS) and surveillance data are essential for public health interventions and programming; however, missing data are often a challenge, potentially introducing bias and impacting the accuracy of vaccine coverage assessments, particularly in addressing disparities.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the performance of 3 multiple imputation methods, Stata&#x2019;s (StataCorp LLC) multiple imputation using chained equations (MICE), scikit-learn&#x2019;s Iterative-Imputer, and Python&#x2019;s miceforest package, in managing missing race and ethnicity data in large-scale surveillance datasets. We compared these methodologies in their ability to preserve demographic distribution, computational efficiency, and performed G-tests on contingency tables to obtain likelihood ratio statistics to assess the association between race and ethnicity and flu vaccination status.</p></sec><sec sec-type="methods"><title>Methods</title><p>In this retrospective cohort study, we analyzed 2021&#x2010;2022 flu vaccination and demographic data from the West Virginia Immunization Information System (N=2,302,036), where race (15%) and ethnicity (34%) were missing. MICE, Iterative Imputer, and miceforest were used to impute missing variables, generating 15 datasets each. Computational efficiency, demographic distribution preservation, and spatial clustering patterns were assessed using G-statistics.</p></sec><sec sec-type="results"><title>Results</title><p>After imputation, an additional 780,339 observations were obtained compared with complete case analysis. All imputation methods exhibited significant spatial clustering for race imputation (G-statistics: MICE=26,452.7, Iterative-Imputer=128,280.3, Miceforest=26,891.5; <italic>P</italic>&#x003C;.001), while ethnicity imputation showed variable clustering patterns (G-statistics: MICE=1142.2, Iterative-Imputer=1.7, Miceforest=2185.0; <italic>P</italic>: MICE&#x003C;.001, Iterative-Imputer=1.7, Miceforest&#x003C;.001). MICE and miceforest best preserved the proportional distribution of demographics. Computational efficiency varied, with MICE requiring 14 hours, Iterative Imputer 2 minutes, and miceforest 10 minutes for 15 imputations. Postimputation estimates indicated a 0.87%&#x2010;18% reduction in stratified flu vaccination coverage rates. Overall estimated flu vaccination rates decreased from 26% to 19% after imputations.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Both MICE and Miceforest offer flexible and reliable approaches for imputing missing demographic data while mitigating bias compared with Iterative-Imputer. Our results also highlight that the imputation method can profoundly affect research findings. Though MICE and Miceforest had better effect sizes and reliability, MICE was much more computationally and time-expensive, limiting its use in large, surveillance datasets. Miceforest can use cloud-based computing, which further enhances efficiency by offloading resource-intensive tasks, enabling parallel execution, and minimizing processing delays. The significant decrease in vaccination coverage estimates validates how incomplete or missing data can eclipse real disparities. Our findings support regular application of imputation methods in immunization surveillance to improve health equity evaluations and shape targeted public health interventions and programming.</p></sec></abstract><kwd-group><kwd>multiple imputation</kwd><kwd>missing data</kwd><kwd>imputation methods</kwd><kwd>data science</kwd><kwd>machine learning</kwd><kwd>statistical modeling</kwd><kwd>immunization information system</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The usage of large datasets obtained from surveillance data and Immunization Information Systems (IIS) has held a vital role in recognizing and comprehending the extent of health disparities and inequities within a population [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Previous research has shown that there is an association between race and ethnicity and vaccine acceptance and uptake [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. However, as exemplified by the COVID-19 pandemic, race and ethnicity fields in public health surveillance systems and health records are historically underpopulated, thereby limiting a full understanding of the extent of vaccine inequities [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Missing data can affect the capability to effectively describe vaccine coverage and may introduce bias into epidemiologic analyses, particularly when attempting to estimate and address racial and ethnic health inequities, further compromising public health decision-making and resource allocation.</p><p>Studies exploring racial and ethnic inequalities traditionally omit individuals with missing demographic information or classify them as &#x201C;unknown&#x201D; [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. However, this can miscalculate vaccine coverage rates when stratified by race and ethnicity, leading to biased analyses between different racial and ethnic groups. This selection bias dampens the capacity to accurately measure the true vaccine uptake among underserved populations that are more likely to have racial and ethnic data missing in surveillance datasets [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Multiple imputation for missing data, alternatively, has been shown to be a better method for eliminating missing data and preserving the greatest amount of data [<xref ref-type="bibr" rid="ref13">13</xref>]. The theoretical foundation and technique for multiple imputation was established by D B Rubin, who developed the structure for managing missing data through the formation of multiple probable datasets [<xref ref-type="bibr" rid="ref15">15</xref>]. This central process undertakes the innate ambiguity of missing values by generating multiple, complete, and reasonable datasets [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>Following multiple imputation, classic likelihood ratio tests cannot be implemented as is because the final estimates do not come directly from a single model, and as a result, require modification to account for the uncertainty introduced by the imputation process. The multiple imputation likelihood ratio test, developed by Li et al [<xref ref-type="bibr" rid="ref19">19</xref>] and Meng et al [<xref ref-type="bibr" rid="ref20">20</xref>], or stacked multiple imputation, introduced by Chan et al [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], provides frameworks for hypothesis testing that properly incorporate both within-imputation and between-imputation variability.</p><p>Multiple imputations using chained equations (MICE) builds upon Rubin&#x2019;s initial work to handle multifaceted and specific datasets; it is a statistically sound method for managing missing data and has been used in the context of medical research and large, national datasets [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. MICE uses the distribution of the original data to estimate values that signify the ambiguity of the true missing value. This can yield unbiased approximations after an adequate number of imputations, which is contingent on the quality of the dataset.</p><p>Machine learning (ML) is increasingly being used to reconcile missing data by offering robust and sophisticated solutions for improving data quality. Different ML techniques use various mathematical processes based on decision trees to predict a specified outcome, all with varying levels of accuracy [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. Similar to traditional MICE, the accuracy and performance of machine learning algorithms to impute missing data rely heavily on the type of missing data being evaluated.</p><p>When compared with various ML techniques, MICE had less bias and similar standard errors for parameter estimates, with additional studies demonstrating that combining MICE with ML yields less biased results [<xref ref-type="bibr" rid="ref26">26</xref>]. Miceforest is a combination of classical epidemiological techniques and ML algorithms. This algorithm uses MICE with light gradient boosting, a tree-based algorithm, to provide a flexible and powerful solution that remains efficient and reliable for managing missing data [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>Recent public health pandemics and outbreaks have emphasized the importance of reliable approaches to missing data in surveillance systems. Studies have demonstrated that imputation methods significantly affect results, with incidence estimates and measures of disparity varying by method, and multiple imputation demonstrating more consistency and efficacy than single or complete case approaches [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>Recent attempts have explored Bayesian Surname and Geocoding and Bayesian Improved Surname and Geocoding (BISG) methods to reconcile missing race and ethnicity data in large public health datasets [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. While this approach has demonstrated practicality in particular circumstances, its suitability for state-level immunization registries is imperfect. Specifically, name-based imputations can present systematic bias in racially and ethnically diverse populations [<xref ref-type="bibr" rid="ref37">37</xref>]. Statewide immunization registries contain various naming conventions and regional uniqueness; reliance on a name-based imputation can bolster misclassification bias and lead to misguided epidemiological analyses. As a result, approaches such as multiple imputation using observed covariates may be more suitable for IISs [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>IISs present unique missing data challenges because they are a birth-to-death registry, making them temporally complex. Missing data patterns may vary by birth cohort due to evolving reporting requirements, provider participant rates, state-specific reporting requirements, use of multiple providers, and technology advancements.</p><p>However, this research addresses a critical gap identified in the surveillance literature, where, despite the widespread recognition of missing data challenges, few studies have systematically evaluated imputation approaches specifically designed for immunization data. To date, there is little published peer-reviewed literature on methods to address missing data in IIS. A 2024 cross-sectional study improved race and ethnicity data in the NYS laboratory reporting system by integrating IIS data, while Russ et al [<xref ref-type="bibr" rid="ref41">41</xref>] enhanced IIS completeness by incorporating external records and applying logistic multiple imputation and random forest techniques [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>This study builds upon established multiple imputation methodologies while addressing the complexities of IIS data structures by adapting recent advancements in handling missing data in EHRs to the specific challenges of population-based immunization surveillance. In this study, we investigate racial and ethnic disparities in flu vaccine uptake in the West Virginia IIS for the 2021&#x2010;2022 flu season. We sought to address potential bias due to missing race and ethnicity data through multiple imputation and test the efficiency and accuracy of 3 established methods to address missing data in IIS and the implications of imputations on flu vaccination coverage data.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>In this retrospective cohort, patient data for the 2021&#x2010;2022 flu vaccination (June 1, 2021, to June 30, 2022) was obtained from the West Virginia Statewide Immunization Information System (WVSIIS). Geo-demographic data, such as urbanicity and Social Vulnerability Index (SVI) status, were calculated using county address data obtained from the IIS. For the purposes of our analysis, urbanicity was defined as a county being either &#x201C;metropolitan&#x201D; or &#x201C;rural.&#x201D; In accordance with the Federal Office of Rural Health Policy (FORHP), counties that had a population of 50,000 or more were considered &#x201C;metropolitan&#x201D; and counties with less than 50,000 were considered &#x201C;rural.&#x201D; County population was obtained from the West Virginia 2020 US Census [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. SVI is calculated by the Centers for Disease Control and Prevention (CDC) and the Agency for Toxic Substances and Disease Registry (ATSDR) at the census tract and county level and represents the potential negative effects of external stresses, such as housing, socioeconomic, and minority statuses have on a community [<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>After a preliminary data cleaning process on a clone of the WVSIIS database, where patients who were marked as deceased, inactive, or retained out of state address were removed, there were a resulting 2,302,036 records with a 2021&#x2010;2022 flu vaccine coverage rate of 26%. 15% of records were missing patient race (n=347,633), and 34% (n=780,339) of records were missing patient ethnicity. We addressed missing race and ethnicity data by using 3 different multiple imputation approaches to assess the stability and reliability of our findings.</p><p>We first implemented Stata 17&#x2019;s (StataCorp LLC) MICE using the mi impute chained command. Stata allows us to identify individual imputed datasets and pool results, excluding the original dataset with missing values.</p><p>We then used the Python scikit-learn&#x2019;s Iterative-Imputer algorithm. Iterative Imputer operates similarly to MICE by using available data in other features to estimate missing values. The imputation is performed in an iterative, round-robin manner, with a regressor to predict the missing values [<xref ref-type="bibr" rid="ref46">46</xref>].</p><p>We applied the Python Miceforest package, which is designed for performing MICE using random forests. Rather than assume a linear relationship between variables, miceforest can capture complex, non-linear patterns and imputes values iteratively, unlike traditional MICE [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>With each imputation method, we created 15 imputed datasets to stabilize our variance estimates [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. We jointly imputed patient race and ethnicity using age, sex, urbanicity, SVI, county, and flu vaccination status. Selection of these covariates was based on the work done by Zhang et al [<xref ref-type="bibr" rid="ref40">40</xref>], and the considerable missingness present in other potentially informative variables. Once missing data had been reconciled, postimputation estimates were applied to WVSIIS flu vaccination data for the 2021&#x2010;2022 flu season.</p><p>We performed G-tests on contingency tables to obtain likelihood ratio statistics to assess the association between race and ethnicity and flu vaccination status. For MICE, we pooled across individual imputed datasets, and G-test statistics were averaged across imputations. In addition, we calculated between-imputation variance to evaluate the stability of results. Pooling was used in lieu of stacking to accommodate the computational requirements of the statewide immunization data&#x2019;s size. No pooling was required of Iterative-Imputer and Miceforest because they produce a single imputation result.</p><p>To assess the proportion estimate, we calculated vaccination proportions with 95% CIs using the Wilson score interval method. This method offers improved coverage properties than the standard normal estimate.</p><p>Sensitivity analyses examined vaccination disparity statistical significance, consistency, comparison of G-statistics to determine variances in effect sizes, and whether CIs for vaccination proportions overlap.</p><p>The primary engine for Iterative-Imputer, Miceforest, and all postimputation analyses was executed in Python 3.11 using pandas, scipy.stats, and matplotlib. The evaluation framework was intended to manage the multilevel configuration of Stata&#x2019;s MICE while preserving uniformity with Iterative-Imputer and Miceforest. All Python computations were performed on a cloud-based computing cluster with 16-core processors and 128 GB of RAM.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study consisted of secondary analysis of deidentified data from WVSIIS. The original data collection was conducted as part of state-mandated public health surveillance under W Va Code R &#x00A7; 64-7-6.6, which requires immunization data collection for all individuals residing in West Virginia. As such, it was not subject to institutional review board (IRB) review [<xref ref-type="bibr" rid="ref49">49</xref>]. The secondary analysis did not involve direct interaction with human candidates and was determined not to constitute human candidate research, qualifying for exemption from IRB review and not requiring informed consent. This study met the jurisdiction requirement for IRB exemption, given the nature of the data used. The data used were compiled for public health surveillance and did not contain personal designations. Consent to access and analyze the data was permitted by the West Virginia Division of Immunization Services. All analyses met the terms of related federal, state, and international data privacy regulations. No compensation was provided or applicable, and no images or materials containing identifiable individual information are included in the manuscript or supplementary files.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>MICE</title><p>347,633 race categories that were previously missing were imputed after completing MICE. There were 16.5% additional White records, 16.8% additional Black records, 16.3% additional Asian records, 18.5% additional Indigenous records, 16.8% additional Native Hawaiian or Pacific Islander records, 17.4% additional Multiracial records, and 15.3% additional other records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>780,339 ethnicity categories that were previously missing were imputed after completing MICE. There were 52.4% additional Hispanic or Latino records and 40.5% additional Not Hispanic or Latino records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>After MICE, individual demographics remained proportional to the original dataset distributions (<xref ref-type="fig" rid="figure1">Figures 1</xref> and <xref ref-type="fig" rid="figure2">2</xref> and <xref ref-type="table" rid="table2">Table 2</xref>). A chi-square test illustrated that there was a statistically significant difference between the complete case and MICE estimates (<italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table3">Table 3</xref>). Overall computational time was approximately 14 hours.</p><p>For the 2021&#x2010;2022 flu season, the complete case analysis flu vaccine coverage rate was 26%. After MICE, West Virginia had an overall flu vaccine coverage rate of 19%. Flu vaccine coverage rates decreased when stratified by race and ethnicity when compared with a complete case analysis, with the most significant decreases observed in non-Hispanic (NH) White (5%), NH Black (4%), Hispanic or Latino (6%), Asian (7%), Native Hawaiian or Pacific Islander (6%), and Other (19%; <xref ref-type="table" rid="table4">Table 4</xref> and <xref ref-type="fig" rid="figure3">Figure 3</xref>). After reconciling missing race and ethnicity, an additional 63,984 individuals were included in the analysis that were previously excluded from stratified analyses.</p><p>Likelihood ratio tests were used to determine the impact of race and ethnicity on vaccine uptake. Using MICE, race was significantly associated with flu vaccination status (G-statistic range=26,365.427&#x2010;26,612.980; pooled G-statistic=26,452.66; <italic>P</italic>&#x003C;.001), with considerable between-imputation variance (4103.38; <xref ref-type="table" rid="table5">Table 5</xref>). This denotes good stability with a coefficient of variation of 0.24%. For ethnicity, the MICE model also denoted a significant impact (G-statistic range=1055.287&#x2010;1,205.254; pooled G-statistic=1142.23; <italic>P</italic>&#x003C;.001), also with substantial between-imputation variance (1277.885) and a moderate coefficient of variation of 3.13% (<xref ref-type="table" rid="table5">Table 5</xref>). Both of MICE&#x2019;s G-scores demonstrate a large effect.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Assessment of West Virginia Statewide Immunization Information System race and ethnicity demographics using complete case analysis and 3 imputation methods&#x2014;multiple imputation using chained equations, Iterative-Imputer, and Miceforest.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Missing, %</td><td align="left" valign="bottom" colspan="2">Complete case</td><td align="left" valign="bottom" colspan="2">MICE<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="2">Iterative-Imputer</td><td align="left" valign="bottom" colspan="2">Miceforest</td></tr></thead><tbody><tr><td align="left" valign="top">Race</td><td align="left" valign="top">15%</td><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top">&#x2003;White, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">1,519,326 (77.7)</td><td align="left" valign="top" colspan="2">1,793,167 (77.8)</td><td align="left" valign="top" colspan="2">1,527,855 (66.4)</td><td align="left" valign="top" colspan="2">1,792,102 (77.8)</td></tr><tr><td align="left" valign="top">&#x2003;Black, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">65,716 (3.36)</td><td align="left" valign="top" colspan="2">77,767 (3.38)</td><td align="left" valign="top" colspan="2">391,756 (17.0)</td><td align="left" valign="top" colspan="2">78,624 (3.42)</td></tr><tr><td align="left" valign="top">&#x2003;Asian, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">14,334 (0.73)</td><td align="left" valign="top" colspan="2">16,886 (0.73)</td><td align="left" valign="top" colspan="2">24,003 (1.04)</td><td align="left" valign="top" colspan="2">16,813 (0.73)</td></tr><tr><td align="left" valign="top">&#x2003;Indigenous, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">7110 (0.36)</td><td align="left" valign="top" colspan="2">8561 (0.37)</td><td align="left" valign="top" colspan="2">7110 (0.31)</td><td align="left" valign="top" colspan="2">9399 (0.41)</td></tr><tr><td align="left" valign="top">&#x2003;Native Hawaiian or Pacific Islander, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">1720 (0.09)</td><td align="left" valign="top" colspan="2">2036 (0.09)</td><td align="left" valign="top" colspan="2">4846 (0.21)</td><td align="left" valign="top" colspan="2">2235 (0.10)</td></tr><tr><td align="left" valign="top">&#x2003;Multiracial, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">2405 (0.12)</td><td align="left" valign="top" colspan="2">2863 (0.12)</td><td align="left" valign="top" colspan="2">2674 (0.12)</td><td align="left" valign="top" colspan="2">2755 (0.11)</td></tr><tr><td align="left" valign="top">&#x2003;Other, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">343,792 (17.6)</td><td align="left" valign="top" colspan="2">400,757 (17.4)</td><td align="left" valign="top" colspan="2">343,792 (14.9)</td><td align="left" valign="top" colspan="2">400,108 (17.4)</td></tr><tr><td align="left" valign="top">Total, N</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">1,954,403</td><td align="left" valign="top" colspan="2">2,302,036</td><td align="left" valign="top" colspan="2">2,302,036</td><td align="left" valign="top" colspan="2">2,302,036</td></tr><tr><td align="left" valign="top">Ethnicity</td><td align="left" valign="top">34%</td><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top">&#x2003;Hispanic or Latino, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">33,652 (2.21)</td><td align="left" valign="top" colspan="2">57,552 (2.50)</td><td align="left" valign="top" colspan="2">33,652 (98.5)</td><td align="left" valign="top" colspan="2">75,031 (3.26)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Not Hispanic or Latino, n (%)</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">1,488,045 (97.8)</td><td align="left" valign="top" colspan="2">2,244,484 (97.5)</td><td align="left" valign="top" colspan="2">2,268,384 (1.46)</td><td align="left" valign="top" colspan="2">2,227,005 (96.7)</td></tr><tr><td align="left" valign="top">Total, N</td><td align="left" valign="top"/><td align="left" valign="top" colspan="2">1,521,697</td><td align="left" valign="top" colspan="2">2,302,036</td><td align="left" valign="top" colspan="2">2,302,036</td><td align="left" valign="top" colspan="2">2,302,036</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>MICE: multiple imputation using chained equations.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flu vaccination coverage proportions with 95% CIs by race across multiple imputation using chained equations, Iterative-Imputer, and Miceforest of the West Virginia Statewide Immunization Information System flu data, 2021&#x2010;2022 (N=2,302,036).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v11i1e73916_fig01.png"/></fig><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flu vaccination coverage proportions with 95% CIs by ethnicity across multiple imputation using chained equations, Iterative-Imputer, and Miceforest of the West Virginia Statewide Immunization Information System flu data, 2021&#x2010;2022 (N=2,302,036).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v11i1e73916_fig02.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>2021&#x2010;2022 WVSIIS<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> flu vaccination proportions with 95% CIs by race and ethnicity after multiple imputation using chained equations, Iterative-Imputer, and Miceforest.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variables</td><td align="left" valign="bottom">MICE<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (95% CI)</td><td align="left" valign="bottom">Iterative-Imputer (95% CI)</td><td align="left" valign="bottom">Miceforest (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Race</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;White</td><td align="left" valign="top">&#x2003;0.177 (0.176&#x2010;0.178)</td><td align="left" valign="top">&#x2003;0.205 (0.204&#x2010;0.205)</td><td align="left" valign="top">&#x2003;0.177 (0.176&#x2010;0.178)</td></tr><tr><td align="left" valign="top">&#x2003;Black</td><td align="left" valign="top">&#x2003;0.117 (0.115&#x2010;0.120)</td><td align="left" valign="top">&#x2003;0.032 (0.032&#x2010;0.033)</td><td align="left" valign="top">&#x2003;0.116 (0.114&#x2010;0.119)</td></tr><tr><td align="left" valign="top">&#x2003;Asian</td><td align="left" valign="top">&#x2003;0.155 (0.150&#x2010;0.160)</td><td align="left" valign="top">&#x2003;0.219 (0.214&#x2010;0.224)</td><td align="left" valign="top">&#x2003;0.156 (0.151&#x2010;0.162)</td></tr><tr><td align="left" valign="top">&#x2003;Indigenous</td><td align="left" valign="top">&#x2003;0.078 (0.073&#x2010;0.084)</td><td align="left" valign="top">&#x2003;0.093 (0.086&#x2010;0.100)</td><td align="left" valign="top">&#x2003;0.072 (0.067&#x2010;0.077)</td></tr><tr><td align="left" valign="top">&#x2003;Native Hawaiian or Pacific Islander</td><td align="left" valign="top">&#x2003;0.143 (0.128&#x2010;0.159)</td><td align="left" valign="top">&#x2003;0.073 (0.066&#x2010;0.080)</td><td align="left" valign="top">&#x2003;0.135 (0.122&#x2010;0.150)</td></tr><tr><td align="left" valign="top">&#x2003;Multiracial</td><td align="left" valign="top">&#x2003;0.009 (0.006&#x2010;0.013)</td><td align="left" valign="top">&#x2003;0.030 (0.024&#x2010;0.037)</td><td align="left" valign="top">&#x2003;0.009 (0.006&#x2010;0.014)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="left" valign="top">&#x2003;0.281 (0.280&#x2010;0.282)</td><td align="left" valign="top">&#x2003;0.323 (0.322&#x2010;0.325)</td><td align="left" valign="top">&#x2003;0.282 (0.280&#x2010;0.283)</td></tr><tr><td align="left" valign="top">Ethnicity</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top">&#x2003;Hispanic or Latino</td><td align="left" valign="top">&#x2003;0.140 (0.137&#x2010;0.142)</td><td align="left" valign="top">&#x2003;0.195 (0.191&#x2010;0.199)</td><td align="left" valign="top">&#x2003;0.129 (0.127&#x2010;0.132)</td></tr><tr><td align="left" valign="top">&#x2003;Not Hispanic or Latino</td><td align="left" valign="top">&#x2003;0.194 (0.193&#x2010;0.194)</td><td align="left" valign="top">&#x2003;0.192 (0.192&#x2010;0.193)</td><td align="left" valign="top">&#x2003;0.194 (0.194&#x2010;0.195)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>WVSIIS: West Virginia Statewide Immunization Information System.</p></fn><fn id="table2fn2"><p><sup>b</sup>MICE: multiple imputation using chained equations.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Chi-square tests evaluating race and ethnicity distributions in the West Virginia Statewide Immunization Information System among the complete case dataset and the multiple imputation using chained equations-imputed, Iterative-Imputer-imputed, and Miceforest-imputed datasets.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Imputation method</td><td align="left" valign="bottom">Pearson chi-square</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">MICE<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">1.1e+04</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Iterative-Imputer</td><td align="left" valign="top">1.8e+05</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Miceforest</td><td align="left" valign="top">1.3e+04</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MICE: multiple imputation using chained equations.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Flu vaccination coverage proportions by race and ethnicity across multiple imputation using chained equations, Iterative-Imputer, and Miceforest of the West Virginia Statewide Immunization Information System flu data compared against the complete case analysis, 2021&#x2010;2022 (N=2,302,036). NH Black: non-Hispanic Black; NH White: non=Hispanic White.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="publichealth_v11i1e73916_fig03.png"/></fig><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>2021&#x2010;2022 West Virginia Statewide Immunization Information System flu vaccination counts for complete case, multiple imputation using chained equations, Iterative-Imputer, and Miceforest by race and ethnicity.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Race and ethnicity</td><td align="left" valign="bottom">Complete case (N=378,574), n (%)</td><td align="left" valign="bottom">MICE<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (N=442,679), n (%)</td><td align="left" valign="bottom">Iterative-Imputer (N=442,679), n (%)</td><td align="left" valign="bottom">Miceforest (N=442,679), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">NH<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> White</td><td align="left" valign="top">274,597 (72.5)</td><td align="left" valign="top">313,530 (71.1)</td><td align="left" valign="top">309,150 (69.8)</td><td align="left" valign="top">312,686 (70.6)</td></tr><tr><td align="left" valign="top">NH<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> Black</td><td align="left" valign="top">7403 (1.96)</td><td align="left" valign="top">8931 (2.02)</td><td align="left" valign="top">12,510 (2.83)</td><td align="left" valign="top">8903 (2.01)</td></tr><tr><td align="left" valign="top">Hispanic or Latino</td><td align="left" valign="top">6565 (1.73)</td><td align="left" valign="top">8031 (1.81)</td><td align="left" valign="top">6565 (1.48)</td><td align="left" valign="top">9692 (2.12)</td></tr><tr><td align="left" valign="top">Asian</td><td align="left" valign="top">1967 (0.52)</td><td align="left" valign="top">2528 (0.57)</td><td align="left" valign="top">5190 (1.17)</td><td align="left" valign="top">2502 (0.57)</td></tr><tr><td align="left" valign="top">Indigenous</td><td align="left" valign="top">495 (0.13)</td><td align="left" valign="top">612 (0.14)</td><td align="left" valign="top">607 (0.14)</td><td align="left" valign="top">605 (0.14)</td></tr><tr><td align="left" valign="top">Native Hawaiian or Pacific Islander</td><td align="left" valign="top">204 (0.05)</td><td align="left" valign="top">246 (0.06)</td><td align="left" valign="top">248 (0.06)</td><td align="left" valign="top">252 (0.06)</td></tr><tr><td align="left" valign="top">Multiracial</td><td align="left" valign="top">16 (0.01)</td><td align="left" valign="top">24 (0.01)</td><td align="left" valign="top">24 (0.01)</td><td align="left" valign="top">22 (0.01)</td></tr><tr><td align="left" valign="top">Other</td><td align="left" valign="top">87,448 (23.1)</td><td align="left" valign="top">108,777 (24.5)</td><td align="left" valign="top">108,385 (24.5)</td><td align="left" valign="top">108,017 (24.4)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>MICE: multiple imputation using chained equations.</p></fn><fn id="table4fn2"><p><sup>b</sup>NH: notHispanicc. </p></fn></table-wrap-foot></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Likelihood ratio test results comparing models with and without race and ethnicity as predictors of flu vaccination. Race and ethnicity were imputed in the West Virginia Statewide Immunization Information System using data collected between 2021 and 2022. Models were fit to datasets imputed using Stata&#x2019;s multiple imputation using chained equations (based on Rubin&#x2019;s rules), Iterative-Imputer (Python scikit-learn), and Miceforest (Python).</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable and method</td><td align="left" valign="bottom">G-Statistic</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Degrees of freedom</td><td align="left" valign="bottom">Between-Imputation Variance<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Race</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stata MICE<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="left" valign="top">26,452.659</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="left" valign="top">4103.382</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Iterative-Imputer</td><td align="left" valign="top">128,280.274</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">6</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Miceforest</td><td align="left" valign="top">26,891.527</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">6</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Ethnicity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stata MICE</td><td align="left" valign="top">1142.231</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">1277.885</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Iterative-Imputer</td><td align="left" valign="top">1.683</td><td align="left" valign="top">.19</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Miceforest</td><td align="left" valign="top">2185.012</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>Between-imputation variance is only applicable to Stata&#x2019;s MICE model.</p></fn><fn id="table5fn2"><p><sup>b</sup>MICE: multiple imputation using chained equations</p></fn><fn id="table5fn3"><p><sup>c</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Iterative-Imputer</title><p>347,633 race categories that were previously missing were successfully imputed after completing the Iterative-Imputer. There were 0.26% additional White, 142.5% additional Black, 50.4% additional Asian, 0% additional Indigenous, 95.2% additional Native Hawaiian or Pacific Islander records, 10.6% additional Multiracial, and 0% additional Other records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>Like MICE, 780,339 ethnicity categories that were previously missing were imputed after completing Iterative-Impute r. There were 0% additional Hispanic or Latino and 4.5% Not Hispanic or Latino records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>After the Iterative Imputer, individual demographics did not remain proportional to the original dataset distributions (<xref ref-type="fig" rid="figure1">Figures 1</xref> and <xref ref-type="fig" rid="figure2">2</xref> and <xref ref-type="table" rid="table2">Table 2</xref>). A chi-square test illustrated that there was a statistically significant difference between the complete case and the Iterative-Imputer estimation (<italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table3">Table 3</xref>). Overall computational time was 2 minutes.</p><p>For the 2021&#x2010;2022 flu season, the complete case analysis flu vaccine coverage rate was 26%. Using Iterative-Imputer, after imputations, West Virginia had an overall flu vaccine coverage rate of 19%. Flu vaccine coverage rates decreased when stratified by race and ethnicity when compared with a complete case analysis, with the most significant decreases observed in NH White (3%), NH Black (13%), Native Hawaiian or Pacific Islander (4%), and other (15%; <xref ref-type="fig" rid="figure3">Figure 3</xref> and <xref ref-type="table" rid="table4">Table 4</xref>). After reconciling missing race and ethnicity, an additional 63,984 individuals were included in the analysis that were previously excluded from stratified analyses.</p><p>Likelihood ratio tests were used to determine the impact of race and ethnicity on vaccine uptake. Applying Iterative Imputer, race was significantly associated with flu vaccination status (G-statistic=128,280.27, <italic>P</italic>&#x003C;.001), demonstrating a massive effect (<xref ref-type="table" rid="table5">Table 5</xref>). For ethnicity, the Iterative-Imputer model produced a nonsignificant result (G-statistic=1.68, <italic>P</italic>=.195; <xref ref-type="table" rid="table5">Table 5</xref>).</p></sec><sec id="s3-3"><title>Miceforest</title><p>347,633 race categories that were previously missing were imputed after completing Miceforest. There were 16.5% additional White records, 17.9% additional Black records, 15.9% additional Asian records, 27.7% additional Indigenous records, 26.0% additional Native Hawaiian or Pacific Islander records, 13.6% additional Multiracial records, and 15.1% additional Other records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>780,339 ethnicity categories that were previously missing were imputed after completing Miceforest. There were 76.1% additional Hispanic or Latino records and 39.8% additional Not Hispanic or Latino records after imputations (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>After Miceforest, individual demographics remained proportional to the original dataset distributions (<xref ref-type="fig" rid="figure1">Figures 1</xref> and <xref ref-type="fig" rid="figure2">2</xref> and <xref ref-type="table" rid="table2">Table 2</xref>). A chi-square test illustrated that there was a statistically significant difference between the complete case and Miceforest estimates (<italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table3">Table 3</xref>). Overall computational time was 10 minutes.</p><p>For the 2021&#x2010;2022 flu season, the complete case analysis flu vaccine coverage rate was 26%. Using miceforest, after imputations, West Virginia had an overall flu vaccine coverage rate of 19%. Flu vaccine coverage rates decreased when stratified by race and ethnicity when compared with a complete case analysis, with the most significant decreases observed in NH White (5%), NH Black (4%), Hispanic or Latino (7%), Asian (7%), Native Hawaiian or Pacific Islander (7%), and Other (18%; <xref ref-type="fig" rid="figure2">Figure 2</xref> and <xref ref-type="table" rid="table4">Table 4</xref>). After reconciling missing race and ethnicity, an additional 63,984 individuals were included in the analysis that were previously excluded from stratified analyses.</p><p>Likelihood ratio tests were used to determine the impact of race and ethnicity on vaccine uptake. Using Miceforest, race was significantly associated with flu vaccination status (G-statistic=26,891.53; <italic>P</italic>&#x003C;.001), demonstrating a large effect (<xref ref-type="table" rid="table5">Table 5</xref>). For ethnicity, the Miceforest model remained robust and was significantly associated with flu vaccination status (G-statistic=2185.01; <italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table5">Table 5</xref>).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Our results highlight that the imputation method can profoundly change research findings. For race, all 3 methods show highly significant results and conclude that there are significant race disparities in flu vaccination after addressing missing data. G-tests performed on race and ethnicity and flu vaccination contingency tables showed consistent evidence of significant racial disparities across all imputation methods (<italic>P</italic>&#x003C;.001). However, for ethnicity, Iterative-Imputer failed to distinguish any significant disproportions, while MICE and Miceforest found strong associations.</p><p>Of the 3 methods used to address missing data, we found that MICE and Miceforest had the best model fit and effect sizes. MICE and Miceforest were able to reconcile missing data with little bias, produced more stable and reliable imputations, and retained better preservation of original data relationships, producing similar conclusions; subsequently allowing for more sophisticated post hoc analyses. Iterative Imputer exhibited the most substantial deviations from the complete case analysis. While the majority of race and ethnicity categories retained proportions similar to those observed before imputation, the Black race category had a notable 142% increase post-Iterative-Imputer. Though all methods identify disparities for race, Iterative-Imputer suggests effects that are 5 times stronger than MICE and Miceforest (Iterative-Imputer G-statistic=128,280; MICE G-statistic=26,453; Miceforest G-statistic=26,892). Alternatively, Iterative-Imputer suggests no significant (<italic>P</italic>=.19) ethnicity disparities, while MICE and Miceforest distinguish strong disparities. Iterative-Imputer&#x2019;s results suggest that the model may be overfitting that data, intensifying present correlations, or generating false patterns in the imputed dataset. The significant change within a singular racial and ethnic group carries several potential implications: it may lead to neglecting crucial health disparities or greatly overvaluing disparities.</p><p>While MICE and Miceforest demonstrated more reliable and moderate effect sizes, the most striking difference between MICE and Miceforest was overall computational time. Large population surveillance-based datasets can be computationally expensive to impute [<xref ref-type="bibr" rid="ref50">50</xref>]. While traditional MICE and Miceforest were proportionally similar to the complete case analysis and had similar G-statistics, the computational load was a significant limitation for further epidemiological analyses. Since Stata 17 operates locally, executing MICE required approximately 14 hours, using between 92% and 98% of the total central processing unit (CPU) capacity of the local system. In contrast, as Iterative-Imputer and Miceforest were processed on a cloud-based drive, the total computational time was reduced to under 10 minutes. Cloud-based computing extends several advantages, including enhanced scalability, reduced dependence on local servers, and the capability to allocate computational jobs efficiently [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. As datasets become larger and more complex, they require larger memory and CPUs become necessary, and consequently, there will be computational limitations. By divesting resource-intensive tasks to the cloud, researchers and public health officials can alleviate processing constraints, enabling analogous performance of multiple tasks and minimizing downtime. This improved efficiency not only accelerates analyses but also improves overall productivity by removing lengthy processing delays.</p><p>Compared with the complete case analysis, the imputed dataset was 45% larger, and the estimated 2021&#x2010;2022 flu vaccination coverage rate decreased from 26% to 19%. This resulted in distinct divergences in vaccination rates from those seen in the complete case analysis. Stratified flu vaccination coverage rates declined across all racial and ethnic categories following imputation. This is both expected, due to denominator inflation, and consistent with existing literature, which suggests that enhancements in data completeness often uncover underlying racial inequities [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref53">53</xref>].</p><p>In addition, the presence of the &#x201C;Other&#x201D; category may be artificially influencing the race missingness rate. Reports indicate that the increasing size of the &#x201C;Other&#x201D; category may be influenced by individuals choosing not to disclose their race, varying cultural perceptions of race, or vaccine providers selecting &#x201C;Other&#x201D; rather than leaving the race field blank [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. Those who select the &#x201C;Other&#x201D; category get combined into a single, dissimilar category that does not reflect accurate identities and makes results for this group difficult to accurately decipher.</p><p>These findings align with the broader imputation literature, which demonstrates that not only does reconciling missing race and ethnicity data reveal under-reported disparities, but that imputation methodology can significantly influence the magnitude of these inequities. Similar patterns have been observed in findings from Labgold et al [<xref ref-type="bibr" rid="ref2">2</xref>], Dorabawila et al [<xref ref-type="bibr" rid="ref42">42</xref>], and Russ et al [<xref ref-type="bibr" rid="ref41">41</xref>], where imputing missing race and ethnicity revealed greater disparities in health outcomes than previously understood, but the severity of these inequities fluctuated contingent upon parameters placed on the imputation method. This is consistent with theoretical bases proposing that the creation and assumptions of statistical models and algorithms can either dilute or intensify disparities, conditional on how missingness is managed and whose data are omitted [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. The absence of demographic data in IISs carries significant public health implications. Missing data limits our capability to correctly assess and monitor vaccination coverage, potentially leading to an incomplete or inaccurate understanding of disparities in vaccination rates among different vulnerable groups. Consequently, public health interventions and policies may be based on flawed assumptions, undermining their effectiveness. Moreover, the inability to identify and address disparities hinders efforts to promote equity within communities. Poor data quality can also result in inefficient allocation of resources, both human and financial, further exacerbating inequities. Finally, inadequate data can erode public trust in health systems, as decisions based on incomplete information may be perceived as unreliable or inequitable. However, Miceforest successfully imputed missing values in a large public health dataset in the most time-efficient manner, and therefore, we were able to mitigate potential bias and increase our statistical power in our analyses. By allowing more complete and representative datasets, Miceforest encourages more equitable vaccination surveillance and permits fuller public health decision-making in a timely manner. State public health departments can use this technique to augment incomplete data to identify vaccine coverage gaps more accurately, concentrate on programming and resource allocation in underserved communities, and assess whether interventions are aiding populations who need it most. Our findings emphasize how multiple imputation is not only a statistical solution but a means for progressing equity in immunization programs and interventions.</p><p>There were several limitations to our approach. Due to high levels of missingness of variables in the dataset, there were limited informative variables for the imputations. The WVSIIS population denominator is higher than the census population, which can skew flu vaccination rates lower than they are. Race and ethnicity are self-reported, and they may not reflect the reality of demographic distribution in the state, especially with growing nuances of racial and ethnic identity.</p><p>Data are the foundation of all effective public health interventions, and missing data can reduce our comprehension of vaccination coverage, recognizing disparities, and creating successful public health programs. Without properly addressing missing data, vulnerable groups continue to be underserved. As demonstrated, different methods for reconciling missing data result in different assumptions regarding the data and the process. However, using Miceforest to reconcile missing demographic data poses a potential solution, offering a flexible, fast, and iterative approach that can improve data completeness while preserving underlying distributions and mitigating potential bias.</p><p>Future efforts should validate imputation values against other population-based surveillance systems, such as census data and claims data. In addition, because our imputations focused on one state&#x2019;s IIS, supplementary efforts should examine the performance of various multiple imputation methods across more diverse populations and geographies and examine the optimal frequency for updating imputation models. Further, based on our results, we recommend several suggestions for enhancing both the epidemiological approach and practice in immunization surveillance. Though more resource-intensive, preventing missing data initially is the most effective measure to handle missing data, especially for surveillance data that necessitates system-wide resolutions. This can be achieved through standardized race and ethnicity data collection protocols and data collection training in state IISs. Imputation should be a temporary answer for a larger, systematic problem. Public health agencies should be transparent in the imputation methods used in reports and present both complete case and imputed estimates to demonstrate the impact of missing data on vaccine equity reports.</p></sec><sec id="s4-2"><title>Conclusions</title><p>Both MICE and Miceforest offer flexible and reliable approaches for imputing missing demographic data in IISs, outperforming Iterative-Imputer with regard to bias mitigation and distributional accuracy. These findings highlight that imputation method selection can greatly affect research outcomes, with repercussions for both statistical validity and public health decision-making and trust. While MICE and Miceforest yielded comparable effect sizes and preserved demographic proportions, MICE&#x2019;s substantial computational demands limit its scalability for large datasets, while Miceforest&#x2019;s ability to leverage cloud-based computing enhances efficiency by offloading resource-intensive tasks, enabling parallel execution, and minimizing processing delays.</p><p>Addressing missing data is both a methodological necessity and a public health imperative when handling large surveillance data. By improving data completeness and analytical precision, multiple imputation methods such as Miceforest can help illuminate disparities, reveal disparities, inform resource allocation, and promote equity in immunization and public health programs.</p></sec></sec></body><back><ack><p>The results presented in this manuscript represent the collaborative efforts across the West Virginia Department of Health and Human Services, the West Virginia Immunization Department, and STChealth, West Virginia Statewide Immunization Information System (WVSIIS). The authors would like to extend their gratitude to Sawyer Koops, MS, Ilyssa Simmons, MPH, Kyle Freese, PhD, and countless other state, contract, federal, and STChealth staff whose expertise, leadership, and dedication were instrumental in this effort.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to data in WVSIIS being confidential and exempt from public disclosure but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ATSDR</term><def><p>Agency for Toxic Substances and Disease Registry</p></def></def-item><def-item><term id="abb2">BISG</term><def><p>Bayesian Improved Surname and Geocoding</p></def></def-item><def-item><term id="abb3">CDC</term><def><p>Centers for Disease Control and Prevention</p></def></def-item><def-item><term id="abb4">CPU</term><def><p>central processing unit</p></def></def-item><def-item><term id="abb5">FORHP</term><def><p>Federal Office of Rural Healthy Policy</p></def></def-item><def-item><term id="abb6">IIS</term><def><p>immunization information systems</p></def></def-item><def-item><term id="abb7">IRB</term><def><p>institutional review board</p></def></def-item><def-item><term id="abb8">MICE</term><def><p>multiple imputation using chained equations</p></def></def-item><def-item><term id="abb9">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb10">NH</term><def><p>non-Hispanic</p></def></def-item><def-item><term id="abb11">SVI</term><def><p>Social Vulnerability Index</p></def></def-item><def-item><term id="abb12">WVSIIS</term><def><p>West Virginia Statewide Immunization Information System</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Noppert</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Zalla</surname><given-names>LC</given-names> </name></person-group><article-title>Who counts and who gets counted? Health equity in infectious disease surveillance</article-title><source>Am J Public Health</source><year>2021</year><month>06</month><volume>111</volume><issue>6</issue><fpage>1004</fpage><lpage>1006</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2021.306249</pub-id><pub-id pub-id-type="medline">33950717</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Labgold</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hamid</surname><given-names>S</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Estimating the unknown: greater racial and ethnic disparities in COVID-19 burden after accounting for missing race and ethnicity data</article-title><source>Epidemiology (Sunnyvale)</source><year>2021</year><month>03</month><day>1</day><volume>32</volume><issue>2</issue><fpage>157</fpage><lpage>161</lpage><pub-id pub-id-type="doi">10.1097/EDE.0000000000001314</pub-id><pub-id pub-id-type="medline">33323745</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazeminia</surname><given-names>M</given-names> </name><name name-style="western"><surname>Afshar</surname><given-names>ZM</given-names> </name><name name-style="western"><surname>Rajati</surname><given-names>M</given-names> </name><name name-style="western"><surname>Saeedi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rajati</surname><given-names>F</given-names> </name></person-group><article-title>Evaluation of the acceptance rate of COVID-19 vaccine and its associated factors: a systematic review and meta-analysis</article-title><source>J Prev (2022)</source><year>2022</year><month>08</month><volume>43</volume><issue>4</issue><fpage>421</fpage><lpage>467</lpage><pub-id pub-id-type="doi">10.1007/s10935-022-00684-1</pub-id><pub-id pub-id-type="medline">35687259</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andersen</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Gloster</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Associations between COVID-19 vaccine uptake, race/ethnicity, and political party affiliation</article-title><source>J Behav Med</source><year>2023</year><month>06</month><volume>46</volume><issue>3</issue><fpage>525</fpage><lpage>531</lpage><pub-id pub-id-type="doi">10.1007/s10865-022-00379-2</pub-id><pub-id pub-id-type="medline">36417011</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mahmud</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>LL</given-names> </name><etal/></person-group><article-title>Effect of race and ethnicity on influenza vaccine uptake among older US medicare beneficiaries: a record-linkage cohort study</article-title><source>Lancet Healthy Longev</source><year>2021</year><month>03</month><volume>2</volume><issue>3</issue><fpage>e143</fpage><lpage>e153</lpage><pub-id pub-id-type="doi">10.1016/S2666-7568(20)30074-X</pub-id><pub-id pub-id-type="medline">36098112</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spangler</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Levy</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Fabian</surname><given-names>MP</given-names> </name><etal/></person-group><article-title>Missing race and ethnicity data among COVID-19 cases in Massachusetts</article-title><source>J Racial Ethn Health Disparities</source><year>2023</year><month>08</month><volume>10</volume><issue>4</issue><fpage>2071</fpage><lpage>2080</lpage><pub-id pub-id-type="doi">10.1007/s40615-022-01387-3</pub-id><pub-id pub-id-type="medline">36056195</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krieger</surname><given-names>N</given-names> </name><name name-style="western"><surname>Testa</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hanage</surname><given-names>WP</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JT</given-names> </name></person-group><article-title>US racial and ethnic data for COVID-19 cases: still missing in action</article-title><source>The Lancet</source><year>2020</year><month>11</month><volume>396</volume><issue>10261</issue><fpage>e81</fpage><pub-id pub-id-type="doi">10.1016/S0140-6736(20)32220-0</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yoon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fuld</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Alternative methods for grouping race and ethnicity to monitor COVID-19 outcomes and vaccination coverage</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2021</year><month>08</month><day>13</day><volume>70</volume><issue>32</issue><fpage>1075</fpage><lpage>1080</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm7032a2</pub-id><pub-id pub-id-type="medline">34383729</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stokes</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Zambrano</surname><given-names>LD</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>KN</given-names> </name><etal/></person-group><article-title>Coronavirus disease 2019 case surveillance - United States, January 22-May 30, 2020</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2020</year><month>06</month><day>19</day><volume>69</volume><issue>24</issue><fpage>759</fpage><lpage>765</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm6924e2</pub-id><pub-id pub-id-type="medline">32555134</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Mertens</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Crider</surname><given-names>YS</given-names> </name><etal/></person-group><article-title>Substantial underestimation of SARS-CoV-2 infection in the United States</article-title><source>Nat Commun</source><year>2020</year><month>09</month><day>9</day><volume>11</volume><issue>1</issue><fpage>4507</fpage><pub-id pub-id-type="doi">10.1038/s41467-020-18272-4</pub-id><pub-id pub-id-type="medline">32908126</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Hung</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Srivastav</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Surveillance of vaccination coverage among adult populations -United States, 2018</article-title><source>MMWR Surveill Summ</source><year>2021</year><month>05</month><day>14</day><volume>70</volume><issue>3</issue><fpage>1</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.15585/mmwr.ss7003a1</pub-id><pub-id pub-id-type="medline">33983910</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Valente</surname><given-names>TW</given-names> </name></person-group><article-title>Data collection and management</article-title><source>Evaluating Health Promotion Programs</source><year>2002</year><publisher-name>Oxford University Press</publisher-name><fpage>123</fpage><lpage>146</lpage></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weston</surname><given-names>BW</given-names> </name></person-group><article-title>Blind spots: biases in prehospital race and ethnicity recording</article-title><source>Prehosp Emerg Care</source><year>2023</year><volume>27</volume><issue>8</issue><fpage>1072</fpage><lpage>1075</lpage><pub-id pub-id-type="doi">10.1080/10903127.2023.2175089</pub-id><pub-id pub-id-type="medline">36735657</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sholle</surname><given-names>ET</given-names> </name><name name-style="western"><surname>Pinheiro</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Adekkanattu</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Underserved populations with missing race ethnicity data differ significantly from those with structured race/ethnicity documentation</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>08</month><day>1</day><volume>26</volume><issue>8-9</issue><fpage>722</fpage><lpage>729</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz040</pub-id><pub-id pub-id-type="medline">31329882</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Multiple imputations in sample surveys&#x2014;a phenomenological bayesian approach to nonresponse</article-title><conf-name>Proceedings of the Survey Research Methods Section of the American Statistical Association</conf-name><conf-date>1978</conf-date><fpage>20</fpage><lpage>28</lpage></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Bayesian inference for causal effects: the role of randomization</article-title><source>Ann Statist</source><year>1978</year><volume>6</volume><issue>1</issue><pub-id pub-id-type="doi">10.1214/aos/1176344064</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Inference and missing data</article-title><source>Biometrika</source><year>1976</year><month>12</month><volume>63</volume><issue>3</issue><fpage>581</fpage><pub-id pub-id-type="doi">10.2307/2335739</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Multiple Imputation After 18+ Years</article-title><source>J Am Stat Assoc</source><year>1996</year><month>06</month><volume>91</volume><issue>434</issue><fpage>473</fpage><pub-id pub-id-type="doi">10.2307/2291635</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Raghunathan</surname><given-names>TE</given-names> </name><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Large-sample significance levels from multiply imputed data using moment-based statistics and an F reference distribution</article-title><source>J Am Stat Assoc</source><year>1991</year><month>12</month><volume>86</volume><issue>416</issue><fpage>1065</fpage><pub-id pub-id-type="doi">10.2307/2290525</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meng</surname><given-names>XL</given-names> </name><name name-style="western"><surname>Rubin</surname><given-names>DB</given-names> </name></person-group><article-title>Performing likelihood ratio tests with multiply-imputed data sets</article-title><source>Biometrika</source><year>1992</year><volume>79</volume><issue>1</issue><fpage>103</fpage><lpage>111</lpage><pub-id pub-id-type="doi">10.1093/biomet/79.1.103</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chan</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>XL</given-names> </name></person-group><article-title>Multiple improvements of multiple imputation likelihood ratio tests</article-title><source>STAT SINICA</source><year>2022</year><pub-id pub-id-type="doi">10.5705/ss.202019.0314</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chan</surname><given-names>KW</given-names> </name></person-group><article-title>General and feasible tests with multiply-imputed datasets</article-title><source>Ann Statist</source><year>2022</year><volume>50</volume><issue>2</issue><pub-id pub-id-type="doi">10.1214/21-AOS2132</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Azur</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Stuart</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Frangakis</surname><given-names>C</given-names> </name><name name-style="western"><surname>Leaf</surname><given-names>PJ</given-names> </name></person-group><article-title>Multiple imputation by chained equations: what is it and how does it work?</article-title><source>Int J Methods Psychiatr Res</source><year>2011</year><month>03</month><volume>20</volume><issue>1</issue><fpage>40</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1002/mpr.329</pub-id><pub-id pub-id-type="medline">21499542</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bouhlila</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Sellaouti</surname><given-names>F</given-names> </name></person-group><article-title>Multiple imputation using chained equations for missing data in TIMSS: a case study</article-title><source>Large-scale Assess Educ</source><year>2013</year><month>12</month><volume>1</volume><issue>1</issue><pub-id pub-id-type="doi">10.1186/2196-0739-1-4</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>T</given-names> </name></person-group><article-title>Application of machine learning missing data imputation techniques in clinical decision making: taking the discharge assessment of patients with spontaneous supratentorial intracerebral hemorrhage as an example</article-title><source>BMC Med Inform Decis Mak</source><year>2022</year><month>01</month><day>13</day><volume>22</volume><issue>1</issue><fpage>13</fpage><pub-id pub-id-type="doi">10.1186/s12911-022-01752-6</pub-id><pub-id pub-id-type="medline">35027065</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Getz</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hubbard</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Linn</surname><given-names>KA</given-names> </name></person-group><article-title>Performance of multiple imputation using modern machine learning methods in electronic health records data</article-title><source>Epidemiology (Sunnyvale)</source><year>2023</year><month>03</month><day>1</day><volume>34</volume><issue>2</issue><fpage>206</fpage><lpage>215</lpage><pub-id pub-id-type="doi">10.1097/EDE.0000000000001578</pub-id><pub-id pub-id-type="medline">36722803</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Emmanuel</surname><given-names>T</given-names> </name><name name-style="western"><surname>Maupong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mpoeleng</surname><given-names>D</given-names> </name><name name-style="western"><surname>Semong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mphago</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tabona</surname><given-names>O</given-names> </name></person-group><article-title>A survey on missing data in machine learning</article-title><source>J Big Data</source><year>2021</year><volume>8</volume><issue>1</issue><fpage>140</fpage><pub-id pub-id-type="doi">10.1186/s40537-021-00516-9</pub-id><pub-id pub-id-type="medline">34722113</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Malin</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name></person-group><article-title>Predicting missing values in medical data via XGBoost regression</article-title><source>J Healthc Inform Res</source><year>2020</year><month>12</month><volume>4</volume><issue>4</issue><fpage>383</fpage><lpage>394</lpage><pub-id pub-id-type="doi">10.1007/s41666-020-00077-1</pub-id><pub-id pub-id-type="medline">33283143</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Schwenker</surname><given-names>F</given-names> </name><name name-style="western"><surname>Schwenker</surname><given-names>F</given-names> </name></person-group><article-title>Early prediction of sepsis based on machine learning algorithm</article-title><source>Comput Intell Neurosci</source><year>2021</year><volume>2021</volume><issue>1</issue><fpage>6522633</fpage><pub-id pub-id-type="doi">10.1155/2021/6522633</pub-id><pub-id pub-id-type="medline">34675971</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>W</given-names> </name></person-group><article-title>Coal and gas outburst prediction model based on miceforest filling and PHHO&#x2013;KELM</article-title><source>Processes (Basel)</source><year>2023</year><volume>11</volume><issue>9</issue><fpage>2722</fpage><pub-id pub-id-type="doi">10.3390/pr11092722</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>AnotherSamWilson</article-title><source>miceforest: Fast, Memory Efficient Imputation with LightGBM</source><year>2020</year><month>08</month><day>30</day><access-date>2025-08-19</access-date><publisher-name>GitHub</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/AnotherSamWilson/miceforest">https://github.com/AnotherSamWilson/miceforest</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pham</surname><given-names>HT</given-names> </name><name name-style="western"><surname>Do</surname><given-names>T</given-names> </name><name name-style="western"><surname>Baek</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Handling missing data in COVID-19 incidence estimation: secondary data analysis</article-title><source>JMIR Public Health Surveill</source><year>2024</year><month>08</month><day>20</day><volume>10</volume><fpage>e53719</fpage><pub-id pub-id-type="doi">10.2196/53719</pub-id><pub-id pub-id-type="medline">39166439</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hategeka</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gr&#x00E9;pin</surname><given-names>KA</given-names> </name></person-group><article-title>Addressing missing values in routine health information system data: an evaluation of imputation methods using data from the Democratic Republic of the Congo during the COVID-19 pandemic</article-title><source>Popul Health Metr</source><year>2021</year><month>11</month><day>4</day><volume>19</volume><issue>1</issue><fpage>44</fpage><pub-id pub-id-type="doi">10.1186/s12963-021-00274-z</pub-id><pub-id pub-id-type="medline">34736462</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Branham</surname><given-names>DK</given-names> </name><name name-style="western"><surname>Finegold</surname><given-names>K</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Trends in missing race and ethnicity information after imputation in HealthCare.gov marketplace enrollment data, 2015-2021</article-title><source>JAMA Netw Open</source><year>2022</year><month>06</month><day>1</day><volume>5</volume><issue>6</issue><fpage>e2216715</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2022.16715</pub-id><pub-id pub-id-type="medline">35687340</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scroggins</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Hulchafo</surname><given-names>II</given-names> </name><name name-style="western"><surname>Topaz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cato</surname><given-names>K</given-names> </name><name name-style="western"><surname>Barcelona</surname><given-names>V</given-names> </name></person-group><article-title>Addressing bias in preterm birth research: the role of advanced imputation techniques for missing race and ethnicity in perinatal health data</article-title><source>Ann Epidemiol</source><year>2024</year><month>06</month><volume>94</volume><fpage>120</fpage><lpage>126</lpage><pub-id pub-id-type="doi">10.1016/j.annepidem.2024.05.003</pub-id><pub-id pub-id-type="medline">38734192</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sorbero</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Euller</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kofner</surname><given-names>A</given-names> </name><name name-style="western"><surname>Elliott</surname><given-names>MN</given-names> </name></person-group><article-title>Imputation of race and ethnicity in health insurance marketplace enrollment data, 2015-2022 open enrollment periods</article-title><source>Rand Health Q</source><year>2022</year><month>11</month><volume>10</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.7249/RRA1853&#x2011;1</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elliott</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Morrison</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Fremont</surname><given-names>A</given-names> </name><name name-style="western"><surname>McCaffrey</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Pantoja</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lurie</surname><given-names>N</given-names> </name></person-group><article-title>Using the census bureau&#x2019;s surname list to improve estimates of race/ethnicity and associated disparities</article-title><source>Health Serv Outcomes Res Method</source><year>2009</year><month>06</month><volume>9</volume><issue>2</issue><fpage>69</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.1007/s10742-009-0047-1</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kozlowski</surname><given-names>D</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Bell</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Avoiding bias when inferring race using name-based approaches</article-title><source>PLoS ONE</source><year>2022</year><volume>17</volume><issue>3</issue><fpage>e0264270</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0264270</pub-id><pub-id pub-id-type="medline">35231059</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kissin</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Liao</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>DeSantis</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Yartel</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Gutman</surname><given-names>R</given-names> </name></person-group><article-title>Multiple imputation of missing race/ethnicity information in the national assisted reproductive technology surveillance system</article-title><source>J Womens Health (Larchmt)</source><year>2024</year><month>03</month><volume>33</volume><issue>3</issue><fpage>328</fpage><lpage>338</lpage><pub-id pub-id-type="doi">10.1089/jwh.2023.0267</pub-id><pub-id pub-id-type="medline">38112534</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rose</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Multiple imputation of missing race and ethnicity in CDC COVID-19 case-level surveillance data</article-title><source>Int J Stat Med Res</source><year>2022</year><month>01</month><day>28</day><volume>11</volume><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.6000/1929-6029.2022.11.01</pub-id><pub-id pub-id-type="medline">35368775</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Russ</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bramley</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Boyce</surname><given-names>I</given-names> </name></person-group><article-title>Bolstering the measurement of racial inequity of COVID-19 vaccine uptake</article-title><source>Vaccines (Basel)</source><year>2023</year><month>04</month><day>21</day><volume>11</volume><issue>4</issue><fpage>876</fpage><pub-id pub-id-type="doi">10.3390/vaccines11040876</pub-id><pub-id pub-id-type="medline">37112788</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dorabawila</surname><given-names>V</given-names> </name><name name-style="western"><surname>Hoen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hoefer</surname><given-names>D</given-names> </name></person-group><article-title>Leveraging multiple administrative data sources to reduce missing race and ethnicity data: a descriptive epidemiology cross-sectional study of COVID-19 case relative rates</article-title><source>J Racial Ethn Health Disparities</source><year>2024</year><month>10</month><day>22</day><volume>22</volume><pub-id pub-id-type="doi">10.1007/s40615-024-02211-w</pub-id><pub-id pub-id-type="medline">39436568</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>How we define rural</article-title><source>Health Resource &#x0026; Services Administration</source><year>2025</year><month>02</month><access-date>2025-06-05</access-date><publisher-name>HRSA</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.hrsa.gov/rural-health/about-us/what-is-rural">https://www.hrsa.gov/rural-health/about-us/what-is-rural</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><article-title>West Virginia: 2020 Census</article-title><source>United States Census Bureau</source><year>2021</year><month>08</month><day>25</day><access-date>2025-06-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.census.gov/library/stories/state-by-state/west-virginia.html">https://www.census.gov/library/stories/state-by-state/west-virginia.html</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="web"><article-title>CDC/ATSDR Social Vulnerability Index 2020 Database West Virginia</article-title><source>Agency for Toxic Substances and Disease Registry. Geospatial Research, Analysis, and Services Program. Centers for Disease Control and Prevention</source><access-date>2025-06-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.atsdr.cdc.gov/placeandhealth/svi/data_documentation_download.html">https://www.atsdr.cdc.gov/placeandhealth/svi/data_documentation_download.html</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="web"><article-title>Iterativeimputer</article-title><source>scikit-learn</source><access-date>2024-11-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html#sklearn.impute.IterativeImputer">https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html#sklearn.impute.IterativeImputer</ext-link></comment></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Raghunathan</surname><given-names>TE</given-names> </name><name name-style="western"><surname>Solenberger</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Hoewyk</surname><given-names>J</given-names> </name></person-group><source>IVEware imputation and Variance Estimation Software User Guide</source><access-date>2023-01-04</access-date><publisher-name>Survey Research Center, Institute for Social Research University of Michigan</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.src.isr.umich.edu/wp-content/uploads/iveware/v0.1/Documentation/ive_user.pdf">https://www.src.isr.umich.edu/wp-content/uploads/iveware/v0.1/Documentation/ive_user.pdf</ext-link></comment></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Graham</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Olchowski</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Gilreath</surname><given-names>TD</given-names> </name></person-group><article-title>How many imputations are really needed? Some practical clarifications of multiple imputation theory</article-title><source>Prev Sci</source><year>2007</year><month>09</month><volume>8</volume><issue>3</issue><fpage>206</fpage><lpage>213</lpage><pub-id pub-id-type="doi">10.1007/s11121-007-0070-9</pub-id><pub-id pub-id-type="medline">17549635</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="web"><article-title>IIS policies: West Virginia</article-title><source>Centers for Disease Control and Prevention</source><year>2024</year><month>08</month><day>9</day><access-date>2025-07-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/iis/policy-legislation/west-virginia.html">https://www.cdc.gov/iis/policy-legislation/west-virginia.html</ext-link></comment></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Jumaili</surname><given-names>AHA</given-names> </name><name name-style="western"><surname>Muniyandi</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Paw</surname><given-names>JKS</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>MJ</given-names> </name></person-group><article-title>Big data analytics using cloud computing based frameworks for power management systems: status, constraints, and future recommendations</article-title><source>Sensors (Basel)</source><year>2023</year><month>03</month><day>8</day><volume>23</volume><issue>6</issue><fpage>2952</fpage><pub-id pub-id-type="doi">10.3390/s23062952</pub-id><pub-id pub-id-type="medline">36991663</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lebeda</surname><given-names>FJ</given-names> </name><name name-style="western"><surname>Zalatoris</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Scheerer</surname><given-names>JB</given-names> </name></person-group><article-title>Government cloud computing policies: potential opportunities for advancing military biomedical research</article-title><source>Mil Med</source><year>2018</year><month>11</month><day>1</day><volume>183</volume><issue>11-12</issue><fpage>e438</fpage><lpage>e447</lpage><pub-id pub-id-type="doi">10.1093/milmed/usx114</pub-id><pub-id pub-id-type="medline">29425378</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmadi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aslani</surname><given-names>N</given-names> </name></person-group><article-title>Capabilities and advantages of cloud computing in the implementation of electronic health record</article-title><source>Acta Inform Med</source><year>2018</year><volume>26</volume><issue>1</issue><fpage>24</fpage><lpage>28</lpage><pub-id pub-id-type="doi">10.5455/aim.2018.26.24-28</pub-id><pub-id pub-id-type="medline">29719309</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coelho</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rocha</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hone</surname><given-names>T</given-names> </name></person-group><article-title>Improvements in data completeness in health information systems reveal racial inequalities: longitudinal national data from hospital admissions in Brazil 2010-2022</article-title><source>Int J Equity Health</source><year>2024</year><month>07</month><day>18</day><volume>23</volume><issue>1</issue><fpage>143</fpage><pub-id pub-id-type="doi">10.1186/s12939-024-02214-3</pub-id><pub-id pub-id-type="medline">39026324</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holloway</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Radack</surname><given-names>J</given-names> </name><name name-style="western"><surname>Barreto</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The &#x201C;other&#x201D; race category on birth certificates and its impact on analyses of preterm birth inequity</article-title><source>J Perinatol</source><year>2025</year><month>03</month><volume>45</volume><issue>3</issue><fpage>372</fpage><lpage>377</lpage><pub-id pub-id-type="doi">10.1038/s41372-024-02123-x</pub-id><pub-id pub-id-type="medline">39304729</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Woolverton</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Marks</surname><given-names>AK</given-names> </name></person-group><article-title>&#x201C;I just check &#x2018;other&#x2019;&#x201D;: evidence to support expanding the measurement inclusivity and equity of ethnicity/race and cultural identifications of US adolescents</article-title><source>Cultur Divers Ethnic Minor Psychol</source><year>2023</year><month>01</month><volume>29</volume><issue>1</issue><fpage>64</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1037/cdp0000360</pub-id><pub-id pub-id-type="medline">34351178</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ansari</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hart-Malloy</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rosenberg</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Trigg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>EG</given-names> </name></person-group><article-title>Modeling the potential impact of missing race and ethnicity data in infectious disease surveillance systems on disparity measures: scenario analysis of different imputation strategies</article-title><source>JMIR Public Health Surveill</source><year>2022</year><month>11</month><day>9</day><volume>8</volume><issue>11</issue><fpage>e38037</fpage><pub-id pub-id-type="doi">10.2196/38037</pub-id><pub-id pub-id-type="medline">36350701</pub-id></nlm-citation></ref></ref-list></back></article>