<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i7e32164</article-id>
      <article-id pub-id-type="pmid">35476722</article-id>
      <article-id pub-id-type="doi">10.2196/32164</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Explaining the Varying Patterns of COVID-19 Deaths Across the United States: 2-Stage Time Series Clustering Framework</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sanchez</surname>
            <given-names>Travis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Couture</surname>
            <given-names>Alexia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Snider</surname>
            <given-names>Dallas</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Megahed</surname>
            <given-names>Fadel M</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2194-5110</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Jones-Farmer</surname>
            <given-names>L Allison</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1529-1133</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ma</surname>
            <given-names>Yinjiao</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1742-2026</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rigdon</surname>
            <given-names>Steven E</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Epidemiology and Biostatistics</institution>
            <institution>College for Public Health and Social Justice</institution>
            <institution>Saint Louis University</institution>
            <addr-line>3545 Lafayette Ave</addr-line>
            <addr-line>St Louis, MO, 63104</addr-line>
            <country>United States</country>
            <phone>1 3149772781</phone>
            <email>steve.rigdon@slu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7668-0899</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Farmer School of Business</institution>
        <institution>Miami University</institution>
        <addr-line>Oxford, OH</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Epidemiology and Biostatistics</institution>
        <institution>College for Public Health and Social Justice</institution>
        <institution>Saint Louis University</institution>
        <addr-line>St Louis, MO</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Steven E Rigdon <email>steve.rigdon@slu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>19</day>
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <volume>8</volume>
      <issue>7</issue>
      <elocation-id>e32164</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>26</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Fadel M Megahed, L Allison Jones-Farmer, Yinjiao Ma, Steven E Rigdon. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 19.07.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2022/7/e32164" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Socially vulnerable communities are at increased risk for adverse health outcomes during a pandemic. Although this association has been established for H1N1, Middle East respiratory syndrome (MERS), and COVID-19 outbreaks, understanding the factors influencing the outbreak pattern for different communities remains limited.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our 3 objectives are to determine how many distinct clusters of time series there are for COVID-19 deaths in 3108 contiguous counties in the United States, how the clusters are geographically distributed, and what factors influence the probability of cluster membership.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We proposed a 2-stage data analytic framework that can account for different levels of temporal aggregation for the pandemic outcomes and community-level predictors. Specifically, we used time-series clustering to identify clusters with similar outcome patterns for the 3108 contiguous US counties. Multinomial logistic regression was used to explain the relationship between community-level predictors and cluster assignment. We analyzed county-level confirmed COVID-19 deaths from Sunday, March 1, 2020, to Saturday, February 27, 2021.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Four distinct patterns of deaths were observed across the contiguous US counties. The multinomial regression model correctly classified 1904 (61.25%) of the counties’ outbreak patterns/clusters.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results provide evidence that county-level patterns of COVID-19 deaths are different and can be explained in part by social and political predictors.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>explanatory modeling</kwd>
        <kwd>multinomial regression</kwd>
        <kwd>SARS-CoV-2</kwd>
        <kwd>COVID-19</kwd>
        <kwd>socioeconomic analyses</kwd>
        <kwd>time series analysis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>A geographically, politically, and socioeconomically diverse nation, the United States consists of 50 states, 48 of which are contiguous. When considering the COVID-19 pandemic in different regions throughout the United States, different patterns of outcomes emerge. Based on data obtained from the open source COVID-19 data hub [<xref ref-type="bibr" rid="ref1">1</xref>], <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the national 7-day moving average of deaths as well as the various patterns that arise among 8 example counties from Sunday, March 1, 2020, to Saturday, February 27, 2021. For example, New York, NY, experienced a large first wave of deaths, followed by a relatively low death count through the remainder of the study. Nearby Ocean County, NJ, a populous county near the New Jersey shore had a large first wave of deaths, followed by a second wave beginning in late 2020. In contrast, Butler County, OH, a populous midwestern county, showed low death counts until late in the study period. None of these patterns mimics the overall pattern for the aggregate death counts in the United States.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Time series profiles of the 7-day moving average of new COVID-19 deaths for the entire United States and 8 sample counties.</p>
        </caption>
        <graphic xlink:href="publichealth_v8i7e32164_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Early in the COVID-19 pandemic, the county-level population mortality and case fatality rates were significantly different among the US regions [<xref ref-type="bibr" rid="ref2">2</xref>]. Explanations for regional differences in health outcomes related to COVID-19 may be the structure of the government and policy making within the United States as it relates to the social vulnerability of the population. In the United States, each state consists of county governments that set health and economic policies for local communities. The counties within the states vary in terms of population size, demographics, access to health care, housing, and transportation. Some have noted that the regional differences in COVID-19 policies, compliance, and subsequent outcomes could be due to political differences across the regions. Goldwitzer et al [<xref ref-type="bibr" rid="ref3">3</xref>] showed Republican-leaning counties displayed less physical distancing compared to Democratic-leaning counties and a subsequent increase in COVID-19 cases and deaths. Another study showed Democratic governors were 50% more likely to implement stay-at-home orders [<xref ref-type="bibr" rid="ref4">4</xref>], which have been associated with increased physical distancing and reduction in COVID-19 cases and deaths [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>Here, we investigate the regional patterns in deaths attributed to COVID-19. The phenomenon of differing national and regional patterns within the United States was illustrated for confirmed COVID-19 cases in Megahed et al [<xref ref-type="bibr" rid="ref6">6</xref>]. In addition, a report by the <italic>Financial Times</italic> [<xref ref-type="bibr" rid="ref7">7</xref>] argued, “Across the world, public health data are gathered at a very local level before aggregation into regional and national figures.... While useful as a summary, local distinctions get lost, painting a misleading image of whole countries being affected uniformly.” In this study, we investigated the various patterns of COVID-19 deaths across 3108 contiguous counties in the United States. We also sought to determine what factors relate to the pattern of deaths. Specifically, we posed 3 questions:</p>
      <list list-type="bullet">
        <list-item>
          <p>How many distinct clusters of counties in the United States exhibit similar time series patterns in the deaths due to COVID-19?</p>
        </list-item>
        <list-item>
          <p>How are these clusters geographically distributed across the United States?</p>
        </list-item>
        <list-item>
          <p>Are certain geographic, political, government, and social vulnerability variables associated with the patterns of COVID-19 related deaths?</p>
        </list-item>
      </list>
      <p>To address the first question, we performed a cluster analysis on the time series of the 3108 US counties. We provided maps to show the geographic distribution of the clusters. To address the third question, we applied a multinomial logistic regression analysis using geographic, political, and social vulnerability data to explain the patterns of deaths due to COVID-19 over time.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This study was conducted in 3 stages: (1) data gathering and preprocessing, (2) time series clustering, and (3) modeling and cluster validation.</p>
      <sec>
        <title>Data</title>
        <p>The open source COVID-19 data hub [<xref ref-type="bibr" rid="ref1">1</xref>] was used to extract county-level time series data related to confirmed COVID-19 deaths from Sunday, March 1, 2020, to Saturday, February 27, 2021. Data were extracted from 3108 counties in the 48 contiguous US states and were completely anonymous. This data set was used to compute the daily confirmed deaths related to COVID-19 by county and contained the sole data used to inform the time series cluster analysis.</p>
        <p>To develop the explanatory model describing the clusters, the following additional variables were gathered: region, governor's party affiliation, government response. the Centers for Disease Control and Prevention’s (CDC) social vulnerability index (SVI), and population density.</p>
        <sec>
          <title>Region</title>
          <p>The CDC produces a 10-region Framework for Chronic Disease Prevention and Health Promotion [<xref ref-type="bibr" rid="ref8">8</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the 10 regions used in our explanatory model. The CDC's National Center for Chronic Disease Prevention and Health Promotion (NCCDPHP) developed these regions to promote consistency in technical assistance and communications for chronic disease prevention [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>The 10 CDC regions. CDC: Centers for Disease Control and Prevention.</p>
            </caption>
            <graphic xlink:href="publichealth_v8i7e32164_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Governor's Party Affiliation</title>
          <p>The political party affiliation of each US state governor (within the 48 contiguous US states) at the start of the pandemic (March 2020) was determined. Since the District of Columbia does not have a governor, the political party of the mayor (Democrat) was used. The party affiliation of the governor was used as this affects the political actions and policies taken, often in the form of executive orders from the governor, during the pandemic [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        </sec>
        <sec>
          <title>Government Response</title>
          <p>The overall government response index (at the US state level) from the Blavatnik School of Government [<xref ref-type="bibr" rid="ref9">9</xref>] was downloaded on March 16, 2021. The index considers containment and closure indicators, such as school and workplace closings; economic response, such as income support and debt relief; and health systems, such as testing policies, contact tracing, and investment in vaccines. Higher values of the government response index indicate a stronger government response related to the pandemic. This index changed over the time of the study period. To capture the index over the majority of the study period, we summarized the index using the median value over the study period. Details of the methodology used to compute the index can be found at Oxford University COVID-19 Tracker Github [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        </sec>
        <sec>
          <title>The Social Vulnerability Index</title>
          <p>The CDC’s SVI is computed by the CDC's Agency for Toxic and Disease Registry's Geospatial Research, Analysis, and Services Program [<xref ref-type="bibr" rid="ref11">11</xref>]. The SVI provides the relative vulnerability of each US county based on US Census data and is ranked on 15 social factors, including unemployment, minority status, and disability. Note that the SVI data from the CDC returned results for 3107 counties, with no data on Rio Arriba County, New Mexico, and hence this county was excluded from our explanatory analysis. The SVI data were grouped into the following 4 themes:</p>
          <list list-type="bullet">
            <list-item>
              <p>SVI theme 1: socioeconomic</p>
            </list-item>
            <list-item>
              <p>SVI theme 2: household composition and disability</p>
            </list-item>
            <list-item>
              <p>SVI theme 3: minority status and language</p>
            </list-item>
            <list-item>
              <p>SVI theme 4: housing and transportation</p>
            </list-item>
          </list>
          <p>Our study included each of the 4 SVI themes. To construct the SVI for each theme, the percentile rank for each variable across the counties was computed. These were summed across the themes and then ranked within each domain. The SVIs ranged from 0 to 1, with higher values of SVIs for a particular theme indicating a higher level of social vulnerability. For more details on the SVI, see Flanagan et al [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
        </sec>
        <sec>
          <title>Population Density</title>
          <p>The population density in each county was computed based on the land area in square miles and the 2014-2018 American Community Survey (ACS) population estimates in each county. Both land area and population estimate variables were obtained from the CDC’s SVI 2018 data set [<xref ref-type="bibr" rid="ref11">11</xref>]. Due to right-skewness in this variable, the natural logarithm of population density was used in the analysis.</p>
        </sec>
      </sec>
      <sec>
        <title>Time Series Clustering</title>
        <p>Time series cluster analysis was based solely on the daily confirmed deaths related to COVID-19 by county. The goal was to separate counties into groups (clusters) that show similar time series patterns. There are 3 important decisions that affect the cluster solution: (1) the scaling of the data, (2) the measure of distance between the clusters, and (3) the clustering algorithm. Liao [<xref ref-type="bibr" rid="ref13">13</xref>] gives an overview of time series clustering methods.</p>
        <p>For this study, the daily confirmed deaths related to COVID-19 by county were smoothed using a 7-day moving average to account for weekly patterns due to reporting. Moreover, the 7-day moving averages were rescaled so that all values fell between 0 and 1 to focus on the pattern of the progression of the deaths rather than the magnitude of the death counts. The magnitude of the death counts in each county depends on many factors, such as county size, population density, and region. The scaled 7-day moving average for county i at time t is</p>
        <p>
          <disp-formula>
            <graphic xlink:href="publichealth_v8i7e32164_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>where <italic>MA7<sub>i,t</sub></italic> is the 7-day moving average of deaths related to COVID-19 for county <italic>i</italic> at time <italic>t</italic>. The maximum in the denominator is taken over all time, <italic>0≤t≤T</italic>. The outer maximum function in Equation (1) is used to account for reporting adjustments that occur with negative death counts on some days.</p>
        <p>For illustration, suppose that county <italic>i</italic> recorded deaths only on days 7, 8, and 9, when, respectively, 7, 21, and 14 deaths occurred. On all other days, no deaths were recorded. For clarity, this sequence of death counts, the calculations of the 7-day moving averages (<italic>MA7<sub>i,t</sub></italic>), and the scaled moving averages (<inline-graphic xlink:href="publichealth_v8i7e32164_fig8.png" xlink:type="simple" mimetype="image"/>) for the first 17 days are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>This method of scaling the 7-day moving averages ensured that we evaluated the shape of the death profile for each county across time.</p>
        <p>Many metrics can be used to measure the distance between time series, including Euclidean distance, dynamic time warping [<xref ref-type="bibr" rid="ref14">14</xref>], and the Pearson correlation coefficient. An elastic measure, such as dynamic time warping, is commonly used with time series clustering [<xref ref-type="bibr" rid="ref13">13</xref>] because it aligns or <italic>warps</italic> the time series so that the distance between them is minimized. Elastic measures such as this do not preserve the timing of the outbreak and deaths in a meaningful way. For this reason, we used the Euclidean distance to measure the distance between the time series clusters. In our case, the Euclidean distance between 2 death profiles of length <italic>T</italic> was</p>
        <p>
          <disp-formula>
            <graphic xlink:href="publichealth_v8i7e32164_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>There are numerous clustering algorithms that have been suggested for time series clustering [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. We used <italic>k</italic>-means clustering for this analysis. A heuristic-based method of clustering, <italic>k</italic>-means clustering partitions <italic>n</italic> objects into <italic>k≤n</italic> mutually exclusive clusters and each cluster is represented by the most centrally located object in the cluster. One limitation of the <italic>k</italic>-means clustering approach is that the number of clusters must be determined a priori in order to obtain a solution. It is common practice in exploratory research to evaluate cluster solutions for several sizes of <italic>k</italic> and select the <italic>best</italic> based on measures of cluster validity or homogeneity [<xref ref-type="bibr" rid="ref16">16</xref>]. The R package <italic>NbClust</italic> [<xref ref-type="bibr" rid="ref17">17</xref>] can be used to compute up to 30 cluster validity indices for cluster solutions of several sizes, <italic>k</italic>. This approach provides a systematic, data-driven method for selecting the optimal number of clusters in a data set without capitalizing on a single validity measure. For this analysis, <italic>k</italic>-means clustering was used to find the cluster solutions and the <italic>NBClust</italic> package was used to determine the optimal number of clusters to retain.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Example calculation of the scaled 7-day moving averages (<inline-graphic xlink:href="publichealth_v8i7e32164_fig8.png" xlink:type="simple" mimetype="image"/>).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <thead>
              <tr valign="top">
                <td>Time </td>
                <td>1</td>
                <td>2</td>
                <td>3</td>
                <td>4</td>
                <td>5</td>
                <td>6</td>
                <td>7</td>
                <td>8</td>
                <td>9</td>
                <td>10</td>
                <td>11</td>
                <td>12</td>
                <td>13</td>
                <td>14</td>
                <td>15</td>
                <td>16</td>
                <td>17</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Deaths</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>7</td>
                <td>21</td>
                <td>14</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>MA7<sub>i,t</sub></italic>
                </td>
                <td>N/A<sup>a</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>1</td>
                <td>4</td>
                <td>6</td>
                <td>6</td>
                <td>6</td>
                <td>6</td>
                <td>6</td>
                <td>5</td>
                <td>2</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <inline-graphic xlink:href="publichealth_v8i7e32164_fig8.png" xlink:type="simple" mimetype="image"/>
                </td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>1/6</td>
                <td>4/6</td>
                <td>1</td>
                <td>1</td>
                <td>1</td>
                <td>1</td>
                <td>1</td>
                <td>5/6</td>
                <td>2/6</td>
                <td>0</td>
                <td>0</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Explanatory Modeling</title>
        <p>The time series clustering method described before resulted in mutually exclusive clusters of time series profiles containing counties with similar patterns in the daily deaths related to COVID-19. To further validate the cluster solution and to explain the differences in the progression of daily deaths across the counties, a multinomial regression analysis [<xref ref-type="bibr" rid="ref18">18</xref>] was fit using the explanatory variables described in the Data section. The <italic>multinom</italic> function from the R package <italic>nnet</italic> [<xref ref-type="bibr" rid="ref19">19</xref>] was used for this analysis.</p>
        <p>Model performance was evaluated in terms of the ability to meaningfully interpret the model coefficients and by evaluating the in-sample classification performance. Specifically, the model predicted cluster was compared to the cluster as determined by the time series cluster solution for each county. The in-sample classification performance was measured by sensitivity, specificity, and balanced accuracy:</p>
        <p>
          <disp-formula>Sensitivity = TP/(TP + FN),</disp-formula>
        </p>
        <p>where TP and FN are the number of true-positive and false-negative predictions, respectively,</p>
        <p>
          <disp-formula>Specificity = TN/(TN + FP),</disp-formula>
        </p>
        <p>where TN and FP are the number of true-negative and false-positive prediction, respectively, and</p>
        <p>
          <disp-formula>Balanced accuracy = (Sensitivity + Specificity)/2.</disp-formula>
        </p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Number of Distinct Clusters</title>
        <p>To address our first research question regarding the number of distinct clusters, we used time series cluster analysis of the scaled 7-day moving average of daily deaths due to COVID-19. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the scaled time series of the daily deaths due to COVID-19 for 9 randomly selected contiguous counties in the United States during the study period. We evaluated 2≤<italic>k</italic>≤51 time series cluster solutions using 23 cluster validity indices [<xref ref-type="bibr" rid="ref17">17</xref>]. Of the 23 validity indices, 7 (30.4%) preferred a 4-cluster solution. The second-most preferred cluster solution was a 2-cluster solution, which was preferred by 6 (26.1%) of the 23 indices. Using a majority rule of the validity indices, we retained a 4-cluster solution.</p>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the geographic distribution of the 4-cluster solution across the United States. Cluster C1 is primarily concentrated in the Upper Midwest and mountain states, as well as in Ohio, Central Kentucky, Virginia, and Maine. Cluster C2 is located along the coast in the Northeast and in some of the larger US cities, such as Chicago, Detroit, Seattle, and New Orleans. Cluster C3 is scattered throughout much of the United States but particularly in Missouri, Illinois, and the states surrounding the Great Lakes. Cluster C4 occurs across the United States but shows concentrations in California, East Texas, the Southwest, and the Southeast. For an interactive color version of this map, please see Section 3.3.3 in Megahed et al [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> shows the 25<sup>th</sup>, 50<sup>th</sup>, and 75<sup>th</sup> percentiles of the time series profiles for the counties within each cluster and provides insight into the shape of the cluster patterns. From <xref rid="figure5" ref-type="fig">Figure 5</xref>, it is clear that counties in cluster C1 experienced a low number of deaths due to COVID-19 throughout the study period. Counties clustering in C2 experienced early death counts beginning in April 2020, but the death counts tapered off in early summer. These counties maintained low death counts throughout the late summer and early fall, until rising again in November 2020. In C3, counties experienced few COVID-19 deaths until October 2020, when they saw a rapid rise in deaths. The death counts in C3 began dropping in December 2020, which continued through March 2021. The fourth cluster, C4, showed a small increase in deaths in late summer, followed by a steady rise throughout the fall and a higher peak in early 2021.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Time series profiles of the scaled 7-day moving average of new COVID-19 deaths for 9 sample counties.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i7e32164_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Map of 4 scaled time series profile clusters of COVID-19 deaths by county in contiguous US counties.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i7e32164_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>A summary plot, where the median scaled time series profile for each cluster is depicted using the solid bold line. The first and third quartiles are shown by dotted and 2-dash lines, respectively.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i7e32164_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Explaining the Clusters</title>
        <p>To address the second research question regarding factors that relate to the patterns of COVID-19–related deaths, we used an explanatory multinomial regression analysis to validate our cluster solution. <xref ref-type="table" rid="table2">Table 2</xref> provides a summary of the explanatory study variables for each cluster.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> gives the coefficients from the multinomial logistic regression analysis. The dependent variable was cluster. The baseline category for the analysis was C1, the cluster of counties with few deaths related to COVID-19. The coefficients showed the linear change in the natural log of the odds ratio (OR) of a county classified in a corresponding cluster (eg, C2, C3, or C4) versus the baseline cluster (C1). From <xref ref-type="table" rid="table3">Table 3</xref>, it is clear that several geographic, political, government, and social vulnerability variables are associated with the patterns in COVID-19–related deaths.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>A summary of how the predictor variables were distributed per cluster. For each numeric variable, we report the mean (SD). For categorical variables, we report the distribution of each subcategory across the 4 clusters. The row summation of percentages for a subcategory may deviate slightly from 100% due to rounding errors.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variables</td>
                <td colspan="2">C1 (N=1261)</td>
                <td colspan="2">C2 (N=226)</td>
                <td colspan="2">C3 (N=827)</td>
                <td colspan="2">C4<sup>a</sup> (N=794)</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>Continuous variables, mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Theme 1: socioeconomic</td>
                <td>0.48 (0.30)</td>
                <td colspan="2">0.44 (0.31)</td>
                <td colspan="2">0.45 (0.27)</td>
                <td colspan="2">0.61 (0.26)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Theme 2: household composition and disability</td>
                <td>0.50 (0.28)</td>
                <td colspan="2">0.37 (0.31)</td>
                <td colspan="2">0.49 (0.28)</td>
                <td colspan="2">0.56 (0.29)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Theme 3: minority status and language</td>
                <td>0.41 (0.28)</td>
                <td colspan="2">0.71 (0.22)</td>
                <td colspan="2">0.43 (0.27)</td>
                <td colspan="2">0.65 (0.24)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Theme 4: housing and transportation</td>
                <td>0.42 (0.29)</td>
                <td colspan="2">0.60 (0.28)</td>
                <td colspan="2">0.49 (0.26)</td>
                <td colspan="2">0.60 (0.27)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Log(population density)</td>
                <td>3.01 (1.71)</td>
                <td colspan="2">5.86 (1.81)</td>
                <td colspan="2">3.73 (1.31)</td>
                <td colspan="2">4.60 (1.29)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Government response index median</td>
                <td>47.09 (8.45)</td>
                <td colspan="2">52.87 (9.13)</td>
                <td colspan="2">47.24 (8.25)</td>
                <td colspan="2">48.13 (7.65)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Categorical variables, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Governor’s party (Democratic)</td>
                <td>579 (45.9)</td>
                <td colspan="2">142 (62.8)</td>
                <td colspan="2">428 (51.8)</td>
                <td colspan="2">202 (25.4)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Governor’s party (Republican)</td>
                <td>682 (54.1)</td>
                <td colspan="2">84 (37.2)</td>
                <td colspan="2">399 (48.2)</td>
                <td colspan="2">591 (74.4)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region A</td>
                <td>41 (3.3)</td>
                <td colspan="2">43 (19.0)</td>
                <td colspan="2">21 (2.5)</td>
                <td colspan="2">24 (3.0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region B</td>
                <td>131 (10.4)</td>
                <td colspan="2">63 (27.9)</td>
                <td colspan="2">62 (7.5)</td>
                <td colspan="2">48 (6.0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region C</td>
                <td>101 (8.0)</td>
                <td colspan="2">19 (8.4)</td>
                <td colspan="2">13 (1.6)</td>
                <td colspan="2">239 (30.1)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region D</td>
                <td>140 (11.1)</td>
                <td colspan="2">20 (8.8)</td>
                <td colspan="2">51 (6.2)</td>
                <td colspan="2">153 (19.3)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region E</td>
                <td>188 (14.9)</td>
                <td colspan="2">30 (13.3)</td>
                <td colspan="2">283 (34.2)</td>
                <td colspan="2">23 (2.9)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region F</td>
                <td>154 (12.2)</td>
                <td colspan="2">31 (13.7)</td>
                <td colspan="2">116 (14.0)</td>
                <td colspan="2">201 (25.3)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region G</td>
                <td>236 (18.7)</td>
                <td colspan="2">7 (3.1)</td>
                <td colspan="2">144 (17.4)</td>
                <td colspan="2">25 (3.1)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region H</td>
                <td>187 (14.8)</td>
                <td colspan="2">7 (3.1)</td>
                <td colspan="2">88 (10.6)</td>
                <td colspan="2">9 (1.1)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region I</td>
                <td>22 (1.7)</td>
                <td colspan="2">1 (0.4)</td>
                <td colspan="2">14 (1.7)</td>
                <td colspan="2">53 (6.7)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Region J</td>
                <td>61 (4.8)</td>
                <td colspan="2">5 (2.2)</td>
                <td colspan="2">35 (4.2)</td>
                <td colspan="2">18 (2.3)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Rio Arriba County, New Mexico, assigned to C4 based on the time series clustering was not modeled using the multinomial logistic regression, since we could not obtain values for its predictor variables. Hence, the reported mean (SDs) and n (%) for C4 exclude this county.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Results of multinomial logistic regression for clusters C2, C3, and C4. We used C1 as the reference cluster since it contained the largest number of counties.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="0"/>
            <col width="130"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Variables</td>
                <td colspan="2">C2</td>
                <td colspan="3">C3</td>
                <td colspan="2">C4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>β (SE)</td>
                <td>OR<sup>a</sup> (95% CI)</td>
                <td>β (SE)</td>
                <td>OR (95% CI)</td>
                <td colspan="2">β (SE)</td>
                <td>OR (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Theme 1: socioeconomic</td>
                <td>0.419 (0.592)</td>
                <td>1.52 (0.48-4.85)</td>
                <td>–0.356 (0.286)</td>
                <td>0.70 (0.40-1.23)</td>
                <td colspan="2">–0.018 (0.376)</td>
                <td>0.98 (0.47-2.05)</td>
              </tr>
              <tr valign="top">
                <td>Theme 2: household composition and disability</td>
                <td>–0.245 (0.432)</td>
                <td>0.78 (0.34-1.83)</td>
                <td>0.392 (0.223)</td>
                <td>1.48 (0.96-2.29)</td>
                <td colspan="2">0.638 (0.267)</td>
                <td>1.89 (1.12-3.19)</td>
              </tr>
              <tr valign="top">
                <td>Theme 3: minority status and language</td>
                <td>3.661 (0.469)</td>
                <td>38.90 (15.51-97.54)</td>
                <td>0.004 (0.222)</td>
                <td>1.00 (0.65-1.55)</td>
                <td colspan="2">1.162 (0.268)</td>
                <td>3.20 (1.89-5.40)</td>
              </tr>
              <tr valign="top">
                <td>Theme 4: housing and transportation</td>
                <td>0.557 (0.428)</td>
                <td>1.75 (0.75-4.04)</td>
                <td>1.086 (0.227)</td>
                <td>2.96 (1.90-4.62)</td>
                <td colspan="2">0.599 (0.270)</td>
                <td>1.82 (1.07-3.09)</td>
              </tr>
              <tr valign="top">
                <td>Log(population density)</td>
                <td>1.009 (0.078)</td>
                <td>2.74 (2.35-3.20)</td>
                <td>0.417 (0.043)</td>
                <td>1.52 (1.39-1.65)</td>
                <td colspan="2">0.959 (0.057)</td>
                <td>2.61 (2.33-2.92)</td>
              </tr>
              <tr valign="top">
                <td>Governor’s party (Republican)</td>
                <td>–0.101 (0.233)</td>
                <td>0.90 (0.57-1.43)</td>
                <td>–0.323 (0.122)</td>
                <td>0.72 (0.57-0.92)</td>
                <td colspan="2">1.093 (0.173)</td>
                <td>2.98 (2.13-4.19)</td>
              </tr>
              <tr valign="top">
                <td>Region B</td>
                <td>–1.879 (0.464)</td>
                <td>0.15 (0.06-0.38)</td>
                <td>–0.509 (0.354)</td>
                <td>0.60 (0.30-1.20)</td>
                <td colspan="2">–1.108 (0.395)</td>
                <td>0.33 (0.15-0.72)</td>
              </tr>
              <tr valign="top">
                <td>Region C</td>
                <td>–2.621 (0.496)</td>
                <td>0.07 (0.03-0.19)</td>
                <td>–1.673 (0.437)</td>
                <td>0.19 (0.08-0.44)</td>
                <td colspan="2">0.502 (0.376)</td>
                <td>1.65 (0.79-3.45)</td>
              </tr>
              <tr valign="top">
                <td>Region D</td>
                <td>–1.717 (0.537)</td>
                <td>0.18 (0.06-0.51)</td>
                <td>–0.574 (0.369)</td>
                <td>0.56 (0.27-1.16)</td>
                <td colspan="2">0.242 (0.401)</td>
                <td>1.27 (0.58-2.80)</td>
              </tr>
              <tr valign="top">
                <td>Region E</td>
                <td>–1.941 (0.461)</td>
                <td>0.14 (0.06-0.35)</td>
                <td>0.884 (0.324)</td>
                <td>2.42 (1.28-4.57)</td>
                <td colspan="2">–1.925 (0.403)</td>
                <td>0.15 (0.07-0.32)</td>
              </tr>
              <tr valign="top">
                <td>Region F</td>
                <td>–1.520 (0.522)</td>
                <td>0.22 (0.08-0.61)</td>
                <td>0.629 (0.367)</td>
                <td>1.88 (0.91-3.85)</td>
                <td colspan="2">0.814 (0.444)</td>
                <td>2.26 (0.95-5.39)</td>
              </tr>
              <tr valign="top">
                <td>Region G</td>
                <td>–2.886 (0.647)</td>
                <td>0.06 (0.02-0.20)</td>
                <td>0.363 (0.361)</td>
                <td>1.44 (0.71-2.92)</td>
                <td colspan="2">–1.536 (0.444)</td>
                <td>0.22 (0.09-0.51)</td>
              </tr>
              <tr valign="top">
                <td>Region H</td>
                <td>–2.221 (0.681)</td>
                <td>0.11 (0.03-0.41)</td>
                <td>0.374 (0.396)</td>
                <td>1.45 (0.67-3.16)</td>
                <td colspan="2">–1.329 (0.570)</td>
                <td>0.26 (0.09-0.81)</td>
              </tr>
              <tr valign="top">
                <td>Region I</td>
                <td>–3.509 (1.117)</td>
                <td>0.03 (0.00-0.27)</td>
                <td>0.657 (0.479)</td>
                <td>1.93 (0.75-4.93)</td>
                <td colspan="2">2.139 (0.476)</td>
                <td>8.49 (3.34-21.58)</td>
              </tr>
              <tr valign="top">
                <td>Region J</td>
                <td>–2.527 (0.666)</td>
                <td>0.08 (0.02-0.29)</td>
                <td>0.228 (0.396)</td>
                <td>1.26 (0.58-2.73)</td>
                <td colspan="2">–0.213 (0.480)</td>
                <td>0.81 (0.32-2.07)</td>
              </tr>
              <tr valign="top">
                <td>Government response</td>
                <td>–0.028 (0.018)</td>
                <td>0.97 (0.94-1.01)</td>
                <td>–0.030 (0.009)</td>
                <td>0.97 (0.95-0.99)</td>
                <td colspan="2">–0.020 (0.012)</td>
                <td>0.98 (0.96-1.00)</td>
              </tr>
              <tr valign="top">
                <td>Constant</td>
                <td>–5.171 (1.292)</td>
                <td>0.01 (0.00-0.07)</td>
                <td>–1.308 (0.684)</td>
                <td>0.35 (0.09-1.35)</td>
                <td colspan="2">–5.115 (0.934)</td>
                <td>0.01 (0.00-0.04)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>OR: odds ratio.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We found that the clusters can be roughly described as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>C1: low death rates throughout much of the pandemic; found mostly in Upper Midwest and mountain states</p>
          </list-item>
          <list-item>
            <p>C2: high death rates in spring 2020, with another spike in December 2020/January 2021; found mostly in the northeast and other large cities</p>
          </list-item>
          <list-item>
            <p>C3: low death rates until fall 2020, followed by a peak in December 2020; spread throughout the United States with concentrations in Central Midwest and Great Lakes</p>
          </list-item>
          <list-item>
            <p>C4: steady death rates from late summer through December 2020, followed by a peak in January; spread throughout the United States with concentrations in California, the Southwest, and the Southeast</p>
          </list-item>
        </list>
        <p>“SVI theme 3: minority status and language” was significantly associated with clustering in C2 versus C1, yielding an OR of 38.90. Counties with high levels of SVI theme 3 were strongly associated with membership in C2 compared to C1. All CDC regions (B-J) showed a significant, negative association with C2 versus C1, indicating that being located outside region A (the Northeast, baseline category for region) is associated with lower odds of clustering in C2 versus C1. This is consistent with our initial finding from the map in <xref rid="figure4" ref-type="fig">Figure 4</xref>, which showed that the counties in C2 are primarily located in the Northeast.</p>
        <p>The variable with the strongest positive association to C3, relative to C1, was “SVI theme 4: housing and transportation.” Population density was also significant and positively related to C3. The governor's party was significant and negatively associated with C3, indicating that counties in states with Republican governors are associated with lower odds of clustering in C3 than in C1. The government response was also significant and negatively related to membership in C3, but the effect was small. Among the regions, the coefficient for region C (North Carolina, South Carolina, Georgia, and Florida) was significant and negative; thus, counties in these states are associated with lower odds of being classified in C3 than in C1. In contrast, the coefficient for region E was significant and positive, which suggests that counties in Minnesota, Wisconsin, Illinois, Indiana, Michigan, and Ohio are associated with higher odds of clustering in C3.</p>
        <p>“SVI theme 1: socioeconomic” was not significant for membership in any of clusters C2-C4; however, 3 of the SVIs (household composition and disability, minority status and language, and housing and transportation) were significant and positively associated with membership in C4. In addition, counties located in states with Republican governors were also associated with higher odds of classification in C4 relative to C1. Among the CDC regions, regions I (California, Nevada, and Arizona) and F (New Mexico, Texas, Oklahoma, and Louisiana) had positive coefficients. Regions B, E, G, and H had significantly negative coefficients. The logarithm of population density was also a significant predictor for classification in C2, C3, and C4, relative to C1, which indicates that a low population density is associated with clustering in C1.</p>
        <p>Overall, the multinomial regression model correctly classified 1904 (61.25%) of the 3108 counties into 1 of 4 clusters. <xref ref-type="table" rid="table4">Table 4</xref> gives the in-sample predictive performance of the multinomial regression model broken down by cluster. The balanced accuracy was similar for all 4 clusters, ranging from 0.63 to 0.80. A more nuanced view of the performance can be seen from sensitivity and specificity. The model performed well in correctly classifying counties in cluster C4 (sensitivity=0.74), which shows a sustained emergence in deaths beginning in late summer 2020. The model also performed well in classifying counties in cluster C1 (sensitivity=0.71), counties with few deaths. However, it had only moderate ability to correctly classify counties into clusters C2 and C3 (sensitivity=0.42 and 0.39, respectively). Note that the sensitivity performance for clusters C2 and C3 exceeded the expected sensitivity of 0.25 that would be obtained from random allocation among 4 classes in a balanced or imbalanced multiclass classification problem (see Megahed et al [<xref ref-type="bibr" rid="ref21">21</xref>] for more details). In terms of specificity, the model performed well at identifying which counties are not in clusters C1-C4, with specificity values ranging from 0.71 to 0.98. <xref rid="figure6" ref-type="fig">Figure 6</xref> shows the distribution of the accuracy of the multinomial logistic model in predicting cluster membership. Counties that were correctly predicted from the model are indicated in a light color, while those that were incorrectly predicted are indicated in a dark color. The model provides some insight into the patterns across the United States, but additional data are needed to more accurately classify counties in terms of the pattern of death rates due to COVID-19. For an interactive version of this map, please see Section 4.2.4 in Megahed et al [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The predictive performance of the multinomial regression model for each cluster.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Cluster</td>
                <td>Balanced accuracy</td>
                <td>Sensitivity</td>
                <td>Specificity</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>C1</td>
                <td>0.71</td>
                <td>0.71</td>
                <td>0.71</td>
              </tr>
              <tr valign="top">
                <td>C2</td>
                <td>0.70</td>
                <td>0.42</td>
                <td>0.98</td>
              </tr>
              <tr valign="top">
                <td>C3</td>
                <td>0.63</td>
                <td>0.39</td>
                <td>0.88</td>
              </tr>
              <tr valign="top">
                <td>C4</td>
                <td>0.80</td>
                <td>0.74</td>
                <td>0.86</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Map of the prediction accuracy of the multinomial logistic model describing the time series cluster solution. Counties in a light color (labeled “Yes”) were correctly classified by the model. Counties in a dark color (labeled “No”) were incorrectly classified. Rio Arriba County, New Mexico (in white), was not classified due to missing data.</p>
          </caption>
          <graphic xlink:href="publichealth_v8i7e32164_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This research provides a framework for understanding the pattern of COVID-19–related deaths across the United States. Using time series clustering with county-level data on the occurrence of COVID-19–related deaths, we observed 4 distinct patterns from March 1, 2020, to February 27, 2021. The second stage of our analysis revealed that these patterns can be partially explained by region as well as social and political predictors.</p>
        <p>Our findings add to the literature on the relationship between COVID-19 outcomes and vulnerable populations [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. The largest number of counties in the United States experienced few deaths during the study period (cluster C1). These counties were, on average, at or below the median of all measures of social vulnerability. With lower population densities, and spread throughout the United States, C1 counties served as our model baseline.</p>
        <p>The county-level COVID-19 death data were extracted using the COVID19 R package [<xref ref-type="bibr" rid="ref1">1</xref>], which extracted confirmed deaths from a GitHub repository [<xref ref-type="bibr" rid="ref25">25</xref>]. The cross-sectional data set containing the predictors used in the multinomial regression was compiled by the authors from disparate sources and is available in Megahed [<xref ref-type="bibr" rid="ref26">26</xref>]. R statistical software version 4.0.4 was used for all processing and analysis of data. A reproducible workflow of our analysis is made available using R Markdown and is hosted in Megahed et al [<xref ref-type="bibr" rid="ref20">20</xref>], following the best practices of Jalali et al [<xref ref-type="bibr" rid="ref27">27</xref>] in reporting and documenting analyses for COVID-19.</p>
        <p>Cluster C3 (low death rates until fall 2020, peaking in December 2020) had the second largest number of counties. C3 counties are spread across much of the country but have concentrations in the Great Lakes and Central Midwest regions. Interestingly, few incidences of C3 occur in the Southeastern United States and along the eastern seaboard from Washington, DC, to Massachusetts. Like C1, counties in C3 had SVI measures below the median, on average. These counties experienced a single late wave in COVID-19 deaths beginning in late October 2020 that declined by the end of the study period. There were a few distinguishing features between counties being classified in C3 versus C1: a higher population density, Democratic state leadership, location outside the Southeast, location in the Great Lakes region, and higher vulnerability in the SVI housing and transportation theme. This index indicates a higher incidence of multiunit housing, mobile homes, crowding, lack of vehicles, or group living situations.</p>
        <p>The 226 counties that are clustered in C2 (high death rates in spring 2020 and December 2020/January 2021) are mostly populous counties in the Northeast, Washington, southeast Louisiana (including New Orleans), and the Four Corners region of Arizona and New Mexico. C2 counties experienced an early outbreak of deaths, followed by a second wave beginning in November 2020 but few deaths in summer 2020. These counties showed a strong relationship with the SVI minority and language theme, indicating a large percentage of residents who are minority or nonnative English speakers.</p>
        <p>Cluster C4 (steady death rates beginning late summer, peaking in January) is located throughout the United States, with concentrations in the Southeast and Southwest. The counties in C4 showed a steady incidence of deaths beginning in late summer 2020 that continued through the study period. C4 counties were, on average, above the median on all SVI themes, and 3 of the 4 themes were significant in classifying counties in C4 versus C1. Specifically, the themes related to household and disability, minority and language, and housing and transportation all showed a positive association with this sustained pattern of COVID-19–related deaths. The majority (n=591, 74.4%) of these counties are located in Republican-led states.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The local patterns in COVID-19–related deaths suggest that local-level factors, including geographic, demographic, and social vulnerability characteristics, are related to adverse outcomes from COVID-19. There are several limitations to this research. These include the observational nature of the study, which was conducted as the pandemic continues to emerge. The retrospective, secondary use of data makes it impossible to infer causation from our model. Outbreaks and adverse outcomes changed over time as local and national governments adopted new policies and vaccines to react to the emerging pandemic. Further, the government response index is available only at the state level and is constant across all counties within a state. Using a state-level predictor to explain cluster membership at the county level could lead to an ecological fallacy.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Despite limitations, this exploratory study revealed new insights into the most severe outcome of the COVID-19 pandemic. The identification of 4 distinct patterns of death incidences in 3108 US counties provides evidence of the differences in the realization of severe outcomes from the pandemic. The United States is a demographically and politically diverse nation, and it is important to understand the differences in pandemic-related outcomes across communities. By examining the relationship between county-level predictors and membership in the 4 cluster patterns, we showed that there are important demographic, political, and socioeconomic differences related to death patterns across the United States.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">OR</term>
          <def>
            <p>odds ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SVI</term>
          <def>
            <p>social vulnerability index</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Our data acquisition and computations were supported in part by the Ohio Supercomputer Center (Grant PZS1007).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guidotti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ardia</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 data hub</article-title>
          <source>JOSS</source>
          <year>2020</year>
          <month>07</month>
          <volume>5</volume>
          <issue>51</issue>
          <fpage>2376</fpage>
          <pub-id pub-id-type="doi">10.21105/joss.02376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hamid</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Ashraf</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>United States county-level COVID-19 death rates and case fatality rates vary by region and urban status</article-title>
          <source>Healthcare (Basel)</source>
          <year>2020</year>
          <month>09</month>
          <day>09</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>330</fpage>
          <pub-id pub-id-type="doi">10.3390/healthcare8030330</pub-id>
          <pub-id pub-id-type="medline">32917009</pub-id>
          <pub-id pub-id-type="pii">healthcare8030330</pub-id>
          <pub-id pub-id-type="pmcid">PMC7551952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gollwitzer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pärnamets</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>IG</given-names>
            </name>
            <name name-style="western">
              <surname>Knowles</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Van Bavel</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Partisan differences in physical distancing are linked to health outcomes during the COVID-19 pandemic</article-title>
          <source>Nat Hum Behav</source>
          <year>2020</year>
          <month>11</month>
          <volume>4</volume>
          <issue>11</issue>
          <fpage>1186</fpage>
          <lpage>1197</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-020-00977-7</pub-id>
          <pub-id pub-id-type="medline">33139897</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-020-00977-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baccini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brodeur</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Explaining governors’ response to the COVID-19 pandemic in the United States</article-title>
          <source>Am Polit Res</source>
          <year>2020</year>
          <month>12</month>
          <day>01</day>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>215</fpage>
          <lpage>220</lpage>
          <pub-id pub-id-type="doi">10.1177/1532673x20973453</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Khetpal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liauw</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Izurieta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ortiz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Impact of Government-Imposed Social Distancing Measures on COVID-19 Morbidity and Mortality around the World</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.who.int/bulletin/online_first/20-262659.pdf">http://www.who.int/bulletin/online_first/20-262659.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Megahed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Allison</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Rigdon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A retrospective cluster analysis of COVID-19 cases by county</article-title>
          <source>bioRxiv</source>
          <comment>Preprint posted online November 12, 2020</comment>
          <pub-id pub-id-type="doi">10.1101/2020.11.12.379537</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <source>Covid-19: The global crisis — in Data</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ig.ft.com/coronavirus-global-data/">https://ig.ft.com/coronavirus-global-data/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Centers for Disease Control and Prevention (CDC)</collab>
          </person-group>
          <source>National Center for Chronic Disease Prevention and Health Promotion Regions</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/coordinatedchronic/docs/nccdphp-regions-map.pdf">https://www.cdc.gov/coordinatedchronic/docs/nccdphp-regions-map.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hale</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Atav</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hallas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kira</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Petherick</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pott</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Variation in US states responses to COVID-19</article-title>
          <source>BSG Working Paper Series</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bsg.ox.ac.uk/sites/default/files/2020-08/BSG-WP-2020-034.pdf">https://www.bsg.ox.ac.uk/sites/default/files/2020-08/BSG-WP-2020-034.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <source>Oxford COVID-19 Government Tracker Methodology</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/OxCGRT/covid-policy-tracker/blob/master/documentation/index_methodology.md">https://github.com/OxCGRT/covid-policy-tracker/blob/master/documentation/index_methodology.md</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Agency for Toxic Substances and Disease Registry</collab>
          </person-group>
          <source>CDC Social Vulnerability Index</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.atsdr.cdc.gov/placeandhealth/svi/index.html">https://www.atsdr.cdc.gov/placeandhealth/svi/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Flanagan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gregory</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hallisey</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>A Social Vulnerability Index for Disaster Management</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.degruyter.com/document/doi/10.2202/1547-7355.1792/html">https://www.degruyter.com/document/doi/10.2202/1547-7355.1792/html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warren Liao</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Clustering of time series data—a survey</article-title>
          <source>Pattern Recognit</source>
          <year>2005</year>
          <month>11</month>
          <volume>38</volume>
          <issue>11</issue>
          <fpage>1857</fpage>
          <lpage>1874</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patcog.2005.01.025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Adaptive Control Processes: A Guided Tour, Volume 2045 in the series Princeton Legacy Library</source>
          <year>2015</year>
          <publisher-loc>Princeton, NJ</publisher-loc>
          <publisher-name>Princeton University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aghabozorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Seyed Shirkhorshidi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ying Wah</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Time-series clustering – a decade review</article-title>
          <source>Inf Syst</source>
          <year>2015</year>
          <month>10</month>
          <volume>53</volume>
          <fpage>16</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1016/j.is.2015.04.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charrad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazzali</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Boiteau</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Niknafs</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>NbClust: an R package for determining the relevant number of clusters in a data set</article-title>
          <source>J Stat Softw</source>
          <year>2014</year>
          <volume>61</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v061.i06</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charrad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazzali</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Boiteau</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Niknafs</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Package ‘NbClust’</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/NbClust/NbClust.pdf">https://cran.r-project.org/web/packages/NbClust/NbClust.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hosmer</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Lemeshow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sturdivant</surname>
              <given-names>RX</given-names>
            </name>
          </person-group>
          <source>Applied Logistic Regression, 3rd Ed</source>
          <year>2013</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ripley</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Venables</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Package ‘nnet’</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/nnet/nnet.pdf">https://cran.r-project.org/web/packages/nnet/nnet.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Megahed</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Jones-Farmer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rigdon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>A Reproducible GitHub Page for a Two-Stage Modeling Framework for Analyzing COVID-19 Deaths by County</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmegahed.github.io/covid_deaths.html">https://fmegahed.github.io/covid_deaths.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Megahed</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Jones-Farmer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rigdon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>A Numerical Study to Examine the Obtained Sensitivity for Arbitrary and Proportional Guessing Scenarios for a 4-Class Classification Problem</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmegahed.github.io/covid_deaths.html">https://fmegahed.github.io/covid_deaths.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Krieger</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Revealing the unequal burden of COVID-19 by income, race/ethnicity, and household crowding: US county versus zip code analyses</article-title>
          <source>J Public Health Manag Pract</source>
          <year>2021</year>
          <volume>27 Suppl 1, COVID-19 and Public Health: Looking Back, Moving Forward</volume>
          <fpage>S43</fpage>
          <lpage>S56</lpage>
          <pub-id pub-id-type="doi">10.1097/PHH.0000000000001263</pub-id>
          <pub-id pub-id-type="medline">32956299</pub-id>
          <pub-id pub-id-type="pii">00124784-202101001-00008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Elo</surname>
              <given-names>IT</given-names>
            </name>
            <name name-style="western">
              <surname>Hempstead</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bor</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Preston</surname>
              <given-names>SH</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and excess mortality in the United States: a county-level analysis</article-title>
          <source>PLoS Med</source>
          <year>2021</year>
          <month>05</month>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>e1003571</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1003571"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1003571</pub-id>
          <pub-id pub-id-type="medline">34014945</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-20-04239</pub-id>
          <pub-id pub-id-type="pmcid">PMC8136644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khanijahani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Racial, ethnic, and socioeconomic disparities in confirmed COVID-19 cases and deaths in the United States: a county-level analysis as of November 2020</article-title>
          <source>Ethn Health</source>
          <year>2021</year>
          <month>01</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>22</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1080/13557858.2020.1853067</pub-id>
          <pub-id pub-id-type="medline">33334160</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/CSSEGISandData/COVID-19">https://github.com/CSSEGISandData/COVID-19</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Megahed</surname>
              <given-names>FM</given-names>
            </name>
          </person-group>
          <source>Covid-19 Deaths</source>
          <access-date>2022-06-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/fmegahed/covid19-deaths/tree/master/Data/Output">https://github.com/fmegahed/covid19-deaths/tree/master/Data/Output</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jalali</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>DiGennaro</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sridhar</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Transparency assessment of COVID-19 models</article-title>
          <source>Lancet Global Health</source>
          <year>2020</year>
          <month>12</month>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e1459</fpage>
          <lpage>e1460</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2214-109X(20)30447-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2214-109X(20)30447-2</pub-id>
          <pub-id pub-id-type="medline">33125915</pub-id>
          <pub-id pub-id-type="pii">S2214-109X(20)30447-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7833180</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
