<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e44467</article-id>
      <article-id pub-id-type="pmid">37436799</article-id>
      <article-id pub-id-type="doi">10.2196/44467</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Pediatric Injury Surveillance From Uncoded Emergency Department Admission Records in Italy: Machine Learning–Based Text-Mining Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Sanchez</surname>
            <given-names>Travis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jiwani</surname>
            <given-names>Nasmin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Raiden</surname>
            <given-names>Silvina</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Azzolina</surname>
            <given-names>Danila</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8185-5742</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Bressan</surname>
            <given-names>Silvia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6736-5392</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lorenzoni</surname>
            <given-names>Giulia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1771-4686</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Baldan</surname>
            <given-names>Giulia Andrea</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-4365-4065</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Bartolotta</surname>
            <given-names>Patrizia</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9779-8298</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Scognamiglio</surname>
            <given-names>Federico</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4610-0950</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Francavilla</surname>
            <given-names>Andrea</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1980-0871</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Lanera</surname>
            <given-names>Corrado</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0520-7428</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Da Dalt</surname>
            <given-names>Liviana</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2977-3907</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Gregori</surname>
            <given-names>Dario</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Unit of Biostatistics, Epidemiology and Public Health</institution>
            <institution>Department of Cardiac, Thoracic, Vascular Sciences, and Public Health</institution>
            <institution>University of Padova</institution>
            <addr-line>Via Leonardo Loredan 18</addr-line>
            <addr-line>Padua, 35128</addr-line>
            <country>Italy</country>
            <phone>39 049 8275384</phone>
            <email>dario.gregori@unipd.it</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7906-0580</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Environmental and Preventive Sciences</institution>
        <institution>University of Ferrara</institution>
        <addr-line>Ferrara</addr-line>
        <country>Italy</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Women's and Children's Health</institution>
        <institution>University of Padova</institution>
        <addr-line>Padua</addr-line>
        <country>Italy</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Unit of Biostatistics, Epidemiology and Public Health</institution>
        <institution>Department of Cardiac, Thoracic, Vascular Sciences, and Public Health</institution>
        <institution>University of Padova</institution>
        <addr-line>Padua</addr-line>
        <country>Italy</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dario Gregori <email>dario.gregori@unipd.it</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>7</month>
        <year>2023</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e44467</elocation-id>
      <history>
        <date date-type="received">
          <day>20</day>
          <month>11</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>14</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>23</day>
          <month>3</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Danila Azzolina, Silvia Bressan, Giulia Lorenzoni, Giulia Andrea Baldan, Patrizia Bartolotta, Federico Scognamiglio, Andrea Francavilla, Corrado Lanera, Liviana Da Dalt, Dario Gregori. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 12.07.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2023/1/e44467" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Unintentional injury is the leading cause of death in young children. Emergency department (ED) diagnoses are a useful source of information for injury epidemiological surveillance purposes. However, ED data collection systems often use free-text fields to report patient diagnoses. Machine learning techniques (MLTs) are powerful tools for automatic text classification. The MLT system is useful to improve injury surveillance by speeding up the manual free-text coding tasks of ED diagnoses.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This research aims to develop a tool for automatic free-text classification of ED diagnoses to automatically identify injury cases. The automatic classification system also serves for epidemiological purposes to identify the burden of pediatric injuries in Padua, a large province in the Veneto region in the Northeast Italy.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The study includes 283,468 pediatric admissions between 2007 and 2018 to the Padova University Hospital ED, a large referral center in Northern Italy. Each record reports a diagnosis by free text. The records are standard tools for reporting patient diagnoses. An expert pediatrician manually classified a randomly extracted sample of approximately 40,000 diagnoses. This study sample served as the gold standard to train an MLT classifier. After preprocessing, a document-term matrix was created. The machine learning classifiers, including decision tree, random forest, gradient boosting method (GBM), and support vector machine (SVM), were tuned by 4-fold cross-validation. The injury diagnoses were classified into 3 hierarchical classification tasks, as follows: injury versus noninjury (task A), intentional versus unintentional injury (task B), and type of unintentional injury (task C), according to the World Health Organization classification of injuries.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The SVM classifier achieved the highest performance accuracy (94.14%) in classifying injury versus noninjury cases (task A). The GBM method produced the best results (92% accuracy) for the unintentional and intentional injury classification task (task B). The highest accuracy for the unintentional injury subclassification (task C) was achieved by the SVM classifier. The SVM, random forest, and GBM algorithms performed similarly against the gold standard across different tasks.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study shows that MLTs are promising techniques for improving epidemiological surveillance, allowing for the automatic classification of pediatric ED free-text diagnoses. The MLTs revealed a suitable classification performance, especially for general injuries and intentional injury classification. This automatic classification could facilitate the epidemiological surveillance of pediatric injuries by also reducing the health professionals’ efforts in manually classifying diagnoses for research purposes.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>pediatrics</kwd>
        <kwd>child and adolescent health</kwd>
        <kwd>text mining</kwd>
        <kwd>injury</kwd>
        <kwd>death</kwd>
        <kwd>surveillance</kwd>
        <kwd>pediatric admission</kwd>
        <kwd>hospitalization</kwd>
        <kwd>patient record</kwd>
        <kwd>unintentional injury</kwd>
        <kwd>emergency department</kwd>
        <kwd>emergency</kwd>
        <kwd>epidemiological surveillance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Unintentional injury is the leading cause of morbidity and mortality in children worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. An injury is defined as tissue damage that occurs secondary to acute exposure to physical agents (eg, thermal, kinetic, chemical, electrical, or electrical energy or water) or chemicals (eg, poisoning). An injury can be fatal or nonfatal and can occur unintentionally or as a result of purposeful acts of harm (intentional) [<xref ref-type="bibr" rid="ref2">2</xref>]. Unintentional injuries can be prevented or controlled because they are potentially understandable and predictable [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>In 2013, 15.4% of 2.6 million unintentional injuries worldwide involved a fatal outcome for children between 1 and 14 years of age [<xref ref-type="bibr" rid="ref1">1</xref>]. In Europe, 42,000 children and adolescents aged 0-19 years died of unintentional injuries in 2004 [<xref ref-type="bibr" rid="ref4">4</xref>]. Moreover, a considerable number of children may incur some form of disability as a result of injury, often with lifelong consequences [<xref ref-type="bibr" rid="ref5">5</xref>]. Decreasing the injury burden is the main challenge for child and adolescent public health policies over the next century [<xref ref-type="bibr" rid="ref6">6</xref>]. For this reason, public health departments must pay more attention to the problem to implement prevention policies [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Injury surveillance is made difficult by a series of logistic and structural challenges, the most important of which is the accurate coding of injury mechanisms, products involved, types of injury, and body parts involved, given that emergency department (ED) admission and discharge records are largely based on narrative free-text notes [<xref ref-type="bibr" rid="ref7">7</xref>]. Injury surveillance integrated with timely data dissemination is crucial for planning and evaluating prevention policies [<xref ref-type="bibr" rid="ref8">8</xref>] and quantifying injury burden and related risk factors [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      <p>In Italy and other European and newly developed countries [<xref ref-type="bibr" rid="ref11">11</xref>], narratives and free-text records are standard tools for reporting patient diagnoses. Automatic classification of such free-text information using machine learning techniques (MLTs) would be a powerful tool to improve injury surveillance [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>This is true, especially for the ED, where physicians and medical personnel often face stressful situations from a clinical and management perspective [<xref ref-type="bibr" rid="ref13">13</xref>]. Within this general framework, it could be promising to provide an automated MLT-based system aimed at facilitating free-text diagnosis encoding, by also limiting an additional burden for the overwhelmed medical staff. This MLT-based system could be tailored for research and epidemiological surveillance purposes. Furthermore, this surveillance system could be promising for pediatric injury surveillance purposes because most of the incidents that occur on the ground are referred to such departments, especially large pediatric EDs [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      <p>The literature over the past 10 years indicates an increasing interest in the automated categorization of free-text diagnoses due to the increased availability of documents in digital form [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Automatic MLT classifiers can learn (during the training phase) from a set of manually classified documents with complex free-text lexical patterns. A properly trained MLT tool can categorize a free-text record into its corresponding class. The advantages of this approach over manual methods are efficiency and saving time (in terms of expert labor) for free-text classification [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p>Statistical text mining methods can also be useful tools to classify electronic ED admission records and properly identify unintentional injury events [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p>This study represents, to our knowledge, the first effort in the literature in proposing an automatic injury classification system based on the free-text data of pediatric ED diagnoses. We propose this algorithm to facilitate injury epidemiological surveillance. The system is aimed at limiting the burden of health care professionals, who are overburdened by patient care and management tasks, in manually classifying diagnoses for epidemiological research.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Selection</title>
        <p>The study included 283,468 pediatric ED records with a filled discharge diagnosis field among 293,215 records [<xref ref-type="bibr" rid="ref16">16</xref>] from the local electronic medical record system of Padova University Hospital in Northeast Italy between 2007 and 2018 (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>The average ED annual workload is approximately 25,000 visits. The upper age limit to access the pediatric ED is 15 years. A higher and more variable age limit applies to children followed by the Department of Pediatrics for chronic illnesses. The Padova Hospital Pediatrics ED is characterized by high patient turnover with an average hospitalization time of 4-5 days. The number of admissions after ED access is approximately 850 per year.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study flowchart—emergency department (ED) selection and gold standard identification together with manual injury classification procedure. Machine learning technique (MLT) cross-validation and prediction procedures for tasks A, B, and C are represented by the dark grey box.</p>
          </caption>
          <graphic xlink:href="publichealth_v9i1e44467_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The study was conducted according to the guidelines of the Declaration of Helsinki and approved by the Ethics Committee of Azienda Ospedaliera of Padova (Hospital Ethics Committee; 0022925) on April 8, 2021. The analysis is carried out on observational data of a secondary nature; however, patients signed a consent form to allow the data to be used for scientific purposes at the time of collection. The records of each patient are kept anonymous with an appropriate identification key excluding personal information. No Compensation has been provided for subjects involved in the research.</p>
      </sec>
      <sec>
        <title>Learning Algorithm for Epidemiological Surveillance</title>
        <p>The definition of the free-text classification algorithm and its use for epidemiological surveillance purposes consisted of several phases, as reported in <xref rid="figure2" ref-type="fig">Figure 2</xref>. These phases are the following:</p>
        <list list-type="order">
          <list-item>
            <p>A training set was defined as a gold standard and was composed of a random sample of ED diagnoses.</p>
          </list-item>
          <list-item>
            <p>The gold standard diagnoses were manually classified by an expert physician into injury versus noninjury (task A), intentional versus unintentional injury (task B), and type of unintentional injury (task C), according to the World Health Organization (WHO) classification of injuries.</p>
          </list-item>
          <list-item>
            <p>The preprocessed and manually classified gold standard cases were used to train the MLTs algorithms in classifying the diagnoses according to tasks A, B, and C. Several algorithms were considered to define the MLT tools; the most performing algorithms would be considered for the predictive tool definition.</p>
          </list-item>
          <list-item>
            <p>The trained tool served to automatically classify the remaining ED diagnoses by providing a proof of concept of the injury epidemiology in the geographical area referring to the Padua ED center.</p>
          </list-item>
          <list-item>
            <p>Once optimal algorithms were defined, they could be used to classify diagnoses on a new ED referral pediatric patient by defining an automated epidemiological surveillance system.</p>
          </list-item>
        </list>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flowchart of learning algorithm development for injury epidemiological surveillance. ED: emergency department; MLT: machine learning technique.</p>
          </caption>
          <graphic xlink:href="publichealth_v9i1e44467_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Gold Standard Definition</title>
        <p>A randomly extracted subset of 40,031 ED records was manually classified (<xref rid="figure1" ref-type="fig">Figure 1</xref>) by a clinician for the following 3 classification tasks:</p>
        <list list-type="order">
          <list-item>
            <p>Injury or noninjury events, characterized as classification task A.</p>
          </list-item>
          <list-item>
            <p>Intentional or unintentional injury events as classification task B (performed on diagnoses classified as injury in classification task A).</p>
          </list-item>
          <list-item>
            <p>Unintentional injury category as classification task C (performed on records classified as unintentional injury in classification task B).</p>
          </list-item>
        </list>
        <p>In this study, unintentional injury ED records were classified based on the WHO [<xref ref-type="bibr" rid="ref17">17</xref>] classification of 5 types of unintentional injuries: road traffic injuries, poisoning, falls, fires, and drowning. We added a sixth category of unintentional injury exclusive of the WHO categories called “other unintentional injury.” In addition, the WHO classes represented by fewer than 15 records were not considered for cross-validation of the automatic MLT classifier.</p>
        <p>Foreign body or choking injury events were also excluded because they constitute a separate epidemiological category conducive to purpose-specific studies [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>Free-text diagnoses were preprocessed by removing punctuation, as reported in the literature [<xref ref-type="bibr" rid="ref19">19</xref>], stop words, white spaces, and numbers, leaving only word stems. All words were converted to the lowercase font.</p>
        <p>After cleaning the text corpus, the free-text data were represented via a document-term matrix (DTM) that represented the diagnostic text data in the form of a matrix. The rows of the matrix reported the sentence for the single diagnosis, and the columns of the matrix represented the single word that composed the sentence. The DTM was filled by weighting each word term with the inverse of its frequency.</p>
        <p>The most frequent words in the ED free-text records were also reported considering the unprocessed DTM in manually classified gold standard cases.</p>
        <p>Different MLT algorithms were trained and tuned to classify injury diagnoses, as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>The decision tree (DT), random forest (RF), and gradient boosting method (GBM) tree-based models.</p>
          </list-item>
          <list-item>
            <p>The support vector machine (SVM) method served as a comparison tool against the tree-based models.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>MLT Classifier Cross-Validation</title>
        <p>The filtered manual classification of 39,576 cases (<xref rid="figure1" ref-type="fig">Figure 1</xref>) served as the gold standard for training and cross-validating (4-fold) the MLT tool. In the literature, it is documented that 4- or 5-fold cross-validation is appropriate to minimize the SD accuracy estimate of a tuned model [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      </sec>
      <sec>
        <title>Tree-Based Methods and the Comparator</title>
        <sec>
          <title>Decision Trees (DTs)</title>
          <p>DTs are classification or regression models based on a top-down methodology in which, starting from the root node, binary splits of data are generated until a certain criterion is satisfied. The classification error rate has been considered as the fraction of training observations in a particular tree partition that doesn’t belong to the most widely occurring class [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
          <p>The DT is a classification method that in several cases suffers from overfitting; for this reason, ensemble methods are provided in the literature. For example, bagging or bootstrap aggregation is a technique for reducing the variance of an estimated prediction function. Bagging seems to work especially well with high variance and low-bias procedures, such as trees [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        </sec>
        <sec>
          <title>Random Forest (RF)</title>
          <p>Rf is a modification of the bagging method that constructs a large collection of poorly correlated trees and then calculates the average. In many problems, the performance of RF is high; RFs are also easy to train and regularize. As a result, they have become quite popular [<xref ref-type="bibr" rid="ref23">23</xref>]. The RF tree-based algorithm involves the computation of hundreds to thousands of DTs and merges them to increase the generalizability of the model. The DT combination essentially takes the form of an ensemble method. Weak learner (or single DT) pooling is used as a strategy to obtain more powerful learners [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        </sec>
        <sec>
          <title>Gradient Boosting Method (GBM)</title>
          <p>The GBM is based on sequential boosting improvements of weak classifiers (high bias and low variance). The GBM sequentially adds one classifier at a time so that the next classifier is trained to improve the previously trained DT. In contrast, the RF algorithm trains each classifier independently of the others [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        </sec>
        <sec>
          <title>Support Vector Machine (SVM) as the Comparator</title>
          <p>The main objective of the SVM algorithm is to find an optimal hyperplane of a feature space of N dimensions (where N is the number of features) that distinctly classifies the data points into a binary partition [<xref ref-type="bibr" rid="ref24">24</xref>]. Several hyperplanes may separate the resulting classes of data points. The SVM algorithm considers the hyperplanes that maximize the margin (the distance between the data points of the classes). The SVM algorithm was selected as a comparator for tree-based algorithms. The kernel hyperplane approach has been considered for the computation.</p>
        </sec>
      </sec>
      <sec>
        <title>Classification Tasks</title>
        <p>Three classification tasks were considered for the analysis, as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Classification of injury versus noninjury events (task A)</p>
          </list-item>
          <list-item>
            <p>Classification of intentional versus unintentional injury events (task B)</p>
          </list-item>
          <list-item>
            <p>Classification of unintentional injuries (task C) based on the WHO categorization (poisoning, road traffic, falls, fires and burns, drowning, and other unintentional injuries) [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Performance Evaluation</title>
        <p>The performance of the MLT classifiers was evaluated using cross-validated accuracy and Kappa agreement values compared to the gold standard. In particular, the training set represented by the gold standard is one of the largest such sets produced to classify injuries.</p>
        <p>For classification scenarios that involved severe class imbalance (where the minority class is represented by less than 15% of the cases), balanced accuracy was reported.</p>
        <p>The mean and maximum accuracies computed for all classes that included unintentional injuries in the gold standard records were also calculated. Other performance measures were reported concerning positive and negative predictive values, sensitivity, and specificity.</p>
      </sec>
      <sec>
        <title>MLT Predictions</title>
        <p>MLT predictions for classification tasks A, B, and C were calculated for admissions to the ED of children residing in Padua (221,175 records; <xref rid="figure1" ref-type="fig">Figure 1</xref>). Subsequently, Poisson 95% CIs for injury incidence rates over the Padova province resident child (aged 0-18 years) population were computed to compare the predictions of the different MLT methods. The person-time was identified in the period of 2007-2018 by considering the official Italian statistic data source ISTAT [<xref ref-type="bibr" rid="ref25">25</xref>]. The number of cases in the period was estimated using the RF, GBM, DT, and SVM algorithms.</p>
      </sec>
      <sec>
        <title>Synthesis of Data</title>
        <p>Summary statistics of the gold-standard case data were reported as follows: continuous data were summarized as first quartile, median, and third quartile; categorical data were reported as percentages and absolute frequencies. Wilcoxon-type tests were performed for continuous variables, and Pearson chi-square test or Fisher exact test, as appropriate, were performed for categorical variables.</p>
        <p>The computations were performed using R 3.4.2 (R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref26">26</xref>] with the caret package [<xref ref-type="bibr" rid="ref27">27</xref>] as a machine learning R interface and the rms package [<xref ref-type="bibr" rid="ref28">28</xref>] for descriptive and standard statistical analyses.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Gold Standard Description</title>
        <p>The gold standard set used to train the MLT classifiers (39,576 cases) was composed of 19,659 female and 19,917 male individual admissions (<xref ref-type="table" rid="table1">Table 1</xref>). The sample was mainly composed of Italians, and 33,474 (85%) gold standard cases were aged between 1 month and 15 years (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>Injury events were mainly represented by Italian male children between 6 and 15 years of age (<xref ref-type="table" rid="table1">Table 1</xref>). Among the 8232 injuries in <xref ref-type="table" rid="table1">Table 1</xref>, only 50 (0.6%) cases were intentional injuries.</p>
        <p>Manually classified WHO unintentional injury drowning cases were not considered in the analyses because there were only 12 such cases (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Falls and road traffic injuries were the main types of unintentional cases in the gold standard set (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of the gold standard cases. Continuous data are reported as medians (first and third quartiles); categorical data are reported as percentages and absolute frequencies. Wilcoxon-type tests were performed for continuous variables; Pearson chi-square test or Fisher exact test, as appropriate, were performed for categorical variables.</p>
          </caption>
          <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
            <col width="30"/>
            <col width="300"/>
            <col width="210"/>
            <col width="180"/>
            <col width="190"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>Noninjury (N=31,344), n (%)</td>
                <td>Injury (N=8232), n (%)</td>
                <td>Overall (N=39,576), n (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Gender</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>15,762 (50)</td>
                <td>3897 (47)</td>
                <td>19,659 (50)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>15,582 (50)</td>
                <td>4335 (53)</td>
                <td>19,917 (50)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Age</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>1-28 days</td>
                <td>4056 (13)</td>
                <td>103 (1)</td>
                <td>4159 (11)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>29 days-1 year</td>
                <td>6035 (19)</td>
                <td>702 (9)</td>
                <td>6737 (17)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>1-3 years</td>
                <td>5293 (17)</td>
                <td>1388 (17)</td>
                <td>6681 (17)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>4-5 years</td>
                <td>5270 (17)</td>
                <td>1387 (17)</td>
                <td>6657 (17)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>6-10 years</td>
                <td>4720 (15)</td>
                <td>1945 (24)</td>
                <td>6665 (17)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>11-15 years</td>
                <td>4170 (13)</td>
                <td>2564 (31)</td>
                <td>6734 (17)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>≥16 years</td>
                <td>1800 (6)</td>
                <td>143 (2)</td>
                <td>1943 (5)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Nationality</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Other countries</td>
                <td>14,875 (47)</td>
                <td>3395 (41)</td>
                <td>18,270 (46)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Italian</td>
                <td>516,469 (53)</td>
                <td>4837 (59)</td>
                <td>21,306 (54)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Season</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Spring</td>
                <td>7590 (24)</td>
                <td>2114 (26)</td>
                <td>9704 (25)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Summer</td>
                <td>7540 (24)</td>
                <td>2381 (29)</td>
                <td>9921 (25)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Autumn</td>
                <td>7783 (25)</td>
                <td>2175 (26)</td>
                <td>9958 (25)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Winter</td>
                <td>8431 (27)</td>
                <td>1562 (19)</td>
                <td>9993 (25)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Manual classification of events</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Noninjury</td>
                <td>31,344 (100)</td>
                <td>—<sup>a</sup></td>
                <td>31,344 (79)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (intentional)</td>
                <td>—</td>
                <td>50 (1)</td>
                <td>50 (0)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (unintentional: poisoning)</td>
                <td>—</td>
                <td>176 (2)</td>
                <td>176 (0)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (unintentional: falls)</td>
                <td>—</td>
                <td>589 (7)</td>
                <td>589 (1)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (unintentional: road traffic)</td>
                <td>—</td>
                <td>520 (6)</td>
                <td>520 (1)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (unintentional: fires and burns)</td>
                <td>—</td>
                <td>194 (2)</td>
                <td>194 (0)</td>
                <td/>
              </tr>
              <tr valign="top">
                <td/>
                <td>Injury (unintentional: other)</td>
                <td>—</td>
                <td>6703 (81)</td>
                <td>6703 (17)</td>
                <td/>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Free-Text Diagnosis Description</title>
        <p>The free-text diagnosis field of the gold standard cases was preprocessed, and the DTM was synthesized based on word occurrence. A manual frequency evaluation of noninjury diagnoses found the most frequent words were “high,” “respiratory,” “tract,” “inflammatory,” and “fever,” whereas the most frequent words in injury diagnoses were “skull,” “trauma,” “wound,” “fracture,” and “hand” (Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>Regarding the unintentional injury classes (Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), “trauma” was the most frequent word found in falls, road traffic, and other unintentional injury diagnoses.</p>
        <p>Burn events were mainly described with hand-related attributes such as “right” and “left.” No drowning events were reported.</p>
        <p>The words “ingestion” and “suspected” were mainly associated with poisoning diagnoses.</p>
      </sec>
      <sec>
        <title>MLT Classifier Performance</title>
        <sec>
          <title>Classification Task A: Injury Versus Noninjury</title>
          <p>The average cross-validated accuracy reported as percentages for the different MLT classifiers was greater than 85% in every case and was very similar for the SVM, GBM, and RF models (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
          <p>A high level of agreement among the methods can be found using the kappa measure, which was greater than 79% in every case with the exception of DT (<xref ref-type="table" rid="table2">Table 2</xref>). In addition, the kappa performance of the RF and SVM methods was very similar.</p>
          <p>Most disagreement cases stemmed from a mismatch with the gold standard, and the methods exhibited a high level of consistency in this regard. For example, the percentage of correctly evaluated injuries was very similar for the SVM and RF disagreement cases (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Injury versus noninjury classification task comparative cross-validated accuracies and kappa scores of the machine learning technique (MLT) classifiers.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="240"/>
              <col width="170"/>
              <col width="150"/>
              <col width="230"/>
              <col width="210"/>
              <thead>
                <tr valign="top">
                  <td>Feature</td>
                  <td>Random forest</td>
                  <td>Decision tree</td>
                  <td>Gradient boosting method</td>
                  <td>Support vector machine</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Overall accuracy (%)</td>
                  <td>94.09</td>
                  <td>88.38</td>
                  <td>94.1</td>
                  <td>94.14</td>
                </tr>
                <tr valign="top">
                  <td>Overall kappa (%)</td>
                  <td>81.76</td>
                  <td>58.66</td>
                  <td>81.63</td>
                  <td>81.77</td>
                </tr>
                <tr valign="top">
                  <td>Sensitivity</td>
                  <td>0.980475</td>
                  <td>0.993396</td>
                  <td>0.985065</td>
                  <td>0.983218</td>
                </tr>
                <tr valign="top">
                  <td>Specificity</td>
                  <td>0.795633</td>
                  <td>0.381413</td>
                  <td>0.7518</td>
                  <td>0.790492</td>
                </tr>
                <tr valign="top">
                  <td>Positive predictive value</td>
                  <td>0.945387</td>
                  <td>0.852819</td>
                  <td>0.934727</td>
                  <td>0.944237</td>
                </tr>
                <tr valign="top">
                  <td>Negative predictive value</td>
                  <td>0.918657</td>
                  <td>0.941199</td>
                  <td>0.933117</td>
                  <td>0.928852</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Classification Task B: Intentional Versus Unintentional Injury</title>
          <p>The balanced accuracy performance was greater than 70% for RF, GBM, DT, and SVM (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
          <p>Considering the other metrics (ie, sensitivity, specificity, negative predictive values, and positive predictive values), the algorithms were able to classify unintentional cases, as the negative predicted values were greater than 99% in every unintentional case. Intentional injuries, in contrast, were misclassified in several cases, and the positive predictive values were less than 2% in every case (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Intentional versus unintentional injury classification task comparative cross-validated performance measures of the machine learning technique (MLT) classifiers.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="230"/>
              <col width="170"/>
              <col width="170"/>
              <col width="220"/>
              <col width="210"/>
              <thead>
                <tr valign="top">
                  <td>Feature</td>
                  <td>Random forest</td>
                  <td>Decision tree</td>
                  <td>Gradient boosting method</td>
                  <td>Support vector machine</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Balanced accuracy (%)</td>
                  <td>71.33</td>
                  <td>75.63</td>
                  <td>76.65</td>
                  <td>70.86</td>
                </tr>
                <tr valign="top">
                  <td>Sensitivity</td>
                  <td>0.6867</td>
                  <td>0.7</td>
                  <td>0.7644</td>
                  <td>0.64</td>
                </tr>
                <tr valign="top">
                  <td>Specificity</td>
                  <td>0.7401</td>
                  <td>0.7787</td>
                  <td>0.7563</td>
                  <td>0.7631</td>
                </tr>
                <tr valign="top">
                  <td>Positive predictive value</td>
                  <td>0.0151</td>
                  <td>0.018</td>
                  <td>0.0178</td>
                  <td>0.0154</td>
                </tr>
                <tr valign="top">
                  <td>Negative predictive value</td>
                  <td>0.9976</td>
                  <td>0.9978</td>
                  <td>0.9982</td>
                  <td>0.9973</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Classification Task C: Unintentional Injury Category</title>
          <p>The algorithms were trained and tuned on different subclasses of unintentional injuries, and the scores of a balanced accuracy measure were found to be relatively greater and similar for RF, SVM, and GBM, and smaller for DT (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
          <p>For the other metrics, all the algorithms correctly identified poisoning and other unspecified injuries. For these classes, the classification positive predictive values were greater than 60% in every case for the different MLT methods (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The metrics reported in Table S2 (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) also revealed decreased performance for the identification of trauma-related injuries, such as “burns,” “falls,” and “road traffic,” where the sensitivity was less than 15%.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Unintentional injury classification task (in every case, maximum accuracy is achieved for the poisoning class).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="270"/>
              <col width="160"/>
              <col width="150"/>
              <col width="220"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td>Feature</td>
                  <td>Random forest</td>
                  <td>Decision tree</td>
                  <td>Gradient boosting method</td>
                  <td>Support vector machine</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Maximum balanced accuracy (%)</td>
                  <td>91.42</td>
                  <td>81.09</td>
                  <td>91.63</td>
                  <td>91.62</td>
                </tr>
                <tr valign="top">
                  <td>Mean balanced accuracy (%)</td>
                  <td>64.59</td>
                  <td>59.26</td>
                  <td>65.07</td>
                  <td>64.95</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Predicted MLT Injury Incidence Rates</title>
        <p>For the 221,175 ED visits studied (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), the median age was 4 years, and the majority of the children were of Italian nationality (172,577, 78%). The estimated number of injury cases was similar for RF, GBM, and SVM but relatively lower for DT (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>The estimated incidence rates of ED entrance for Padova residents (2007-2018) were very similar across the GBM, RF, and SVM methods and slightly lower for DT (Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In Italy, unintentional injuries in children have been scarcely investigated. Padova Hospital is an important health center in Northeast Italy, characterized by a high number of daily ED visits due to unintentional injuries. Analysis of the Padova Hospital ED database represents a suitable starting point for the development of a reliable and generalizable epidemiological injury surveillance system.</p>
        <p>The free-text classification may improve the epidemiologic surveillance of pediatric ED injuries. However, the manual classification of free-text diagnoses is often time-consuming and requires highly trained clinicians [<xref ref-type="bibr" rid="ref29">29</xref>]. On the contrary, automated text classification approaches require relatively fixed data sources and can improve the efficiency and timeliness of ED surveillance systems [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>Several MLT methods are currently used to perform automatic text classification. The methodological comparison of different MLTs is useful to achieve a valid and accurate text classification [<xref ref-type="bibr" rid="ref30">30</xref>]. This study demonstrates that ensemble tree-based resampling methods (RF and GBM) and SVMs are consistent with each other [<xref ref-type="bibr" rid="ref31">31</xref>], reporting good classification accuracy [<xref ref-type="bibr" rid="ref11">11</xref>] over different classification tasks, as corroborated in the literature [<xref ref-type="bibr" rid="ref12">12</xref>]. DT is known to have a high variance when using training or test sets different from the same data set because it is prone to overfitting. Moreover, the optimal choice of an MLT classifier should be integrated and tailored to gold-standard data characteristics, such as the number of classes, class imbalance, and the correlation structure of predictors. In the literature, ensemble methods (ie, RF and GBM) have been shown to be more robust in relation to these previously mentioned issues [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>Cross-validation is a useful method that limits overfitting and allows tuning of DT parameters to optimize model accuracy [<xref ref-type="bibr" rid="ref33">33</xref>]. The best classifier performance in this study was achieved on the task of identifying injury versus noninjury cases in ED visits (task A).</p>
        <p>Regarding surveillance, the implications of our results are clear; current injury surveillance systems are largely based on mortality or hospital discharge data [<xref ref-type="bibr" rid="ref13">13</xref>]. However, thousands of pediatric patients are treated in the EDs and subsequently discharged [<xref ref-type="bibr" rid="ref13">13</xref>]. In Italy and other European and newly developed countries, ED data often contain narratives and free text to describe patient diagnoses [<xref ref-type="bibr" rid="ref11">11</xref>]. Thus, an automated ED surveillance system, not requiring additional physician work, would be a suitable tool for comprehensive surveillance of childhood injuries. A negative predictive value of at least 99% was found to identify unintentional injuries; this indicates that there is a high probability that the cases identified as unintentional by this algorithm were unintentional. MLTs are capable of correctly classifying unintentional cases, which are highly prevalent injury events. In the literature [<xref ref-type="bibr" rid="ref34">34</xref>], unintentional injury ED visits were found nearly 20 times more than intentional injury ED visits in the United States, and the pattern is similar in European countries [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <p>The algorithms in this study performed poorly on the identification of intentional events. The reason for this poor performance was due to a lack of intentional injury cases (n=50, 0.6%) in the data [<xref ref-type="bibr" rid="ref19">19</xref>]. Other methods are needed to develop a more accurate free-text classifier for intentional injury events. Poor performance was also evident in the distinction of trauma-related injuries (eg, falls and road traffic injuries).</p>
        <p>MLTs (especially ensemble algorithms) have shown good classification performance in poisoning events. From an epidemiological perspective, poisoning events remain the third most common cause of unintentional injuries in Italy and Europe [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>Large EDs are important sources of surveillance for pediatric diseases, especially for trauma and injury-related issues, given that most of such events refer to these departments [<xref ref-type="bibr" rid="ref13">13</xref>]. However, the staff employed in such facilities often work in stressful situations, and the time and human resources to devote to data collection and accurate diagnosis coding may be very limited. In this general framework, our proposed MLT-based tool could facilitate the automatic classification of events for surveillance purposes. Once implemented, this algorithm could be easily improved by accumulating more data on less prevalent injury categories. It is hereby possible to obtain a general overview of the phenomenon on the territory by monitoring its epidemiological evolution over time. This system could facilitate the timely activation of intervention policies, regardless of the alarming concentrations of injury events.</p>
        <p>Moreover, it is also important to improve surveillance systems using classified ED data integrated with hospital discharge or mortality records to design effective injury prevention programs and interventions. In this general context, the proposed ML-based injury classification tool could be a first step toward addressing the burden of pediatric injuries from a new holistic perspective [<xref ref-type="bibr" rid="ref36">36</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One first limitation of this study is that the data used for injury classification provide little information on what happens between ED admission and the final diagnosis. Moreover, as the triage service is extremely operator dependent, human factors represent an important confounding aspect of injury classification.</p>
        <p>Another possible limitation is the small prevalence of certain types of injury, such as intentional injuries and drowning (among unintentional ones); this issue makes the algorithm’s performance on these types of events lacking. The injury and unintentional injury classifications constitute the leading classification task for this research; however, further research developments are needed to enrich the diagnosis data on these types of injuries and train the classification machine for a more refined surveillance tool. Moreover, the poorly represented classes of unintentional injuries constitute an issue to be deepened from a technical standpoint. Within this framework, further research is needed to develop algorithms tailored to handle severe class imbalance.</p>
        <p>Another point to explore is the generalization of the algorithm; the MLT performance may be influenced by a training process performed on diagnoses data retrieved from the same center, where the referring physicians could maintain the same writing style across the data set. For this reason, data from other centers would be needed to generalize the validity of the epidemiological tool. Despite this limitation, this tool constitutes a proof of concept of an epidemiological surveillance attempt performed using a machine trained on data from a large pediatric ED referral center in Northeast Italy.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This research paper reports an MLT-based free-text classification application conducted for the epidemiological surveillance of pediatric injuries. The algorithms have been trained considering the free-text diagnoses data of the Padova University Hospital ED unit, a large referral center in Northeast Italy.</p>
        <p>The results of this study, for the injury classification task, showed that MLTs are a promising tool for improving epidemiological surveillance, allowing for the characterization of pediatric injuries in the ED by considering the free-text diagnoses as data sources.</p>
        <p>The reported classification performance is satisfactory, especially for general injuries and intentional injury classification. These research results could facilitate the surveillance of a phenomenon that is often not easy to identify. Moreover, the approach could save time for health professionals working in the ED in manually classifying diagnoses for research purposes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary figures.</p>
        <media xlink:href="publichealth_v9i1e44467_app1.docx" xlink:title="DOCX File , 127 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">DT</term>
          <def>
            <p>decision tree</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ED</term>
          <def>
            <p>emergency department</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GBM</term>
          <def>
            <p>gradient boosting method</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MLT</term>
          <def>
            <p>machine learning technique</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The database is provided from an administrative data source of the University Hospital of Padua. They could be made available upon motivated request to the authors.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>DA, DG, and SB prepared the original draft. DA, GAB, GL, CL, FS, AF, and PB wrote, reviewed, and edited the manuscript. Formal analysis was conducted by DA. The methodology was designed by DA and DG. The study was supervised by DG and LDD.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alonge</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>UR</given-names>
            </name>
            <name name-style="western">
              <surname>Hyder</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Our shrinking globe: implications for child unintentional injuries</article-title>
          <source>Pediatr Clin North Am</source>
          <year>2016</year>
          <month>02</month>
          <volume>63</volume>
          <issue>1</issue>
          <fpage>167</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pcl.2015.08.009</pub-id>
          <pub-id pub-id-type="medline">26613695</pub-id>
          <pub-id pub-id-type="pii">S0031-3955(15)00149-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deal</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Gomby</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Zippiroli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Behrman</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Unintentional injuries in childhood: analysis and recommendations</article-title>
          <source>Future Child</source>
          <year>2000</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>4</fpage>
          <pub-id pub-id-type="doi">10.2307/1602823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Judy</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Unintentional injuries in pediatrics</article-title>
          <source>Pediatr Rev</source>
          <year>2011</year>
          <month>10</month>
          <volume>32</volume>
          <issue>10</issue>
          <fpage>431</fpage>
          <lpage>8; quiz 439</lpage>
          <pub-id pub-id-type="doi">10.1542/pir.32-10-431</pub-id>
          <pub-id pub-id-type="medline">21965710</pub-id>
          <pub-id pub-id-type="pii">32/10/431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Sethi D, Towner E, Vincenten J, et al.</collab>
          </person-group>
          <source>European report on child injury prevention</source>
          <year>2008</year>
          <publisher-loc>Copenhagen</publisher-loc>
          <publisher-name>World Health Organization. Regional Office for Europe</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>WHO</article-title>
          <source>World report on child injury prevention</source>
          <access-date>2019-02-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/9789241563574">https://www.who.int/publications/i/item/9789241563574</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krug</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Lozano</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The global burden of injuries</article-title>
          <source>Am J Public Health</source>
          <year>2000</year>
          <month>04</month>
          <volume>90</volume>
          <issue>4</issue>
          <fpage>523</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.90.4.523</pub-id>
          <pub-id pub-id-type="medline">10754963</pub-id>
          <pub-id pub-id-type="pmcid">PMC1446200</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nijman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Blatter</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>SM 04-1421 Can autotext regcognition software for coding injuries replace manual coding? findings from IDB/DISS in the netherlands</article-title>
          <source>Inj Prev BMJ Publishing Group Ltd</source>
          <year>2018</year>
          <volume>24</volume>
          <issue>Suppl 2</issue>
          <fpage>A266</fpage>
          <pub-id pub-id-type="doi">10.1136/injuryprevention-2018-safety.736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>McClure</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>McKenzie</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Implementing the national priorities for injury surveillance</article-title>
          <source>Med J Aust</source>
          <year>2008</year>
          <month>04</month>
          <day>07</day>
          <volume>188</volume>
          <issue>7</issue>
          <fpage>405</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.5694/j.1326-5377.2008.tb01685.x</pub-id>
          <pub-id pub-id-type="medline">18393744</pub-id>
          <pub-id pub-id-type="pii">mit10302_fm</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hyder</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Sugerman</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Puvanachandra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Razzak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>El-Sayed</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Isaza</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Peden</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Global childhood unintentional injury surveillance in four cities in developing countries: a pilot study</article-title>
          <source>Bull World Health Organ</source>
          <year>2009</year>
          <month>05</month>
          <volume>87</volume>
          <issue>5</issue>
          <fpage>345</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19551252"/>
          </comment>
          <pub-id pub-id-type="doi">10.2471/blt.08.055798</pub-id>
          <pub-id pub-id-type="medline">19551252</pub-id>
          <pub-id pub-id-type="pii">S0042-96862009000500011</pub-id>
          <pub-id pub-id-type="pmcid">PMC2678776</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochdorn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzoni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Francavilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baldas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berchialla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alves</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Azzolina</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Monitoring public perception of health risks in Brazil and Italy: cross-cultural research on the risk perception of choking in children</article-title>
          <source>Children (Basel)</source>
          <year>2021</year>
          <month>06</month>
          <day>24</day>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>541</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2318/1807906"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/children8070541</pub-id>
          <pub-id pub-id-type="medline">34202693</pub-id>
          <pub-id pub-id-type="pii">children8070541</pub-id>
          <pub-id pub-id-type="pmcid">PMC8307887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lorenzoni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bressan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lanera</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Azzolina</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Da Dalt</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Analysis of unstructured text-based data using machine learning techniques: the case of pediatric emergency department records in Nicaragua</article-title>
          <source>Med Care Res Rev</source>
          <year>2021</year>
          <month>04</month>
          <day>29</day>
          <volume>78</volume>
          <issue>2</issue>
          <fpage>138</fpage>
          <lpage>145</lpage>
          <pub-id pub-id-type="doi">10.1177/1077558719844123</pub-id>
          <pub-id pub-id-type="medline">31030615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vallmuur</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nanda</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lehto</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>347 Using machine learning to categorise Emergency Department data for product safety surveillance</article-title>
          <source>Inj Prev</source>
          <year>2016</year>
          <month>09</month>
          <day>01</day>
          <volume>22</volume>
          <issue>Suppl 2</issue>
          <fpage>A127.2</fpage>
          <lpage>A127</lpage>
          <pub-id pub-id-type="doi">10.1136/injuryprev-2016-042156.347</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adirim</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lomax</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Injury surveillance in a pediatric emergency department</article-title>
          <source>Am J Emerg Med</source>
          <year>1999</year>
          <month>10</month>
          <volume>17</volume>
          <issue>6</issue>
          <fpage>499</fpage>
          <lpage>503</lpage>
          <pub-id pub-id-type="doi">10.1016/s0735-6757(99)90184-5</pub-id>
          <pub-id pub-id-type="medline">10530522</pub-id>
          <pub-id pub-id-type="pii">S0735-6757(99)90184-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sebastiani</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in automated text categorization</article-title>
          <source>ACM Comput Surv</source>
          <year>2002</year>
          <month>03</month>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1145/505282.505283</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Doraiswamy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsay</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Injury surveillance in an accident and emergency department: a year in the life of CHIRPP</article-title>
          <source>Arch Dis Child</source>
          <year>1999</year>
          <month>06</month>
          <volume>80</volume>
          <issue>6</issue>
          <fpage>533</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://adc.bmj.com/lookup/pmidlookup?view=long&#38;pmid=10332002"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/adc.80.6.533</pub-id>
          <pub-id pub-id-type="medline">10332002</pub-id>
          <pub-id pub-id-type="pmcid">PMC1717950</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ocagli</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Azzolina</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bressan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bottigliengo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Settin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzoni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Da Dalt</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology and trends over time of foreign body injuries in the pediatric emergency department</article-title>
          <source>Children (Basel)</source>
          <year>2021</year>
          <month>10</month>
          <day>19</day>
          <volume>8</volume>
          <issue>10</issue>
          <fpage>938</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=children8100938"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/children8100938</pub-id>
          <pub-id pub-id-type="medline">34682203</pub-id>
          <pub-id pub-id-type="pii">children8100938</pub-id>
          <pub-id pub-id-type="pmcid">PMC8534431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sminkey</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>World report on child injury prevention</article-title>
          <source>Inj Prev</source>
          <year>2008</year>
          <month>02</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>69</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1136/ip.2007.018143</pub-id>
          <pub-id pub-id-type="medline">18245322</pub-id>
          <pub-id pub-id-type="pii">14/1/69</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Passali</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzoni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cocca</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Loglisci</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Passali</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Bellussi</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Foreign body injuries in children: a review</article-title>
          <source>Acta Otorhinolaryngol Ital</source>
          <year>2015</year>
          <month>10</month>
          <volume>35</volume>
          <issue>4</issue>
          <fpage>265</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="medline">26824213</pub-id>
          <pub-id pub-id-type="pmcid">PMC4731891</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lanera</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Minto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gregori</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Berchialla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Baldi</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Extending PubMed searches to ClinicalTrials.gov through a machine learning approach for systematic reviews</article-title>
          <source>J Clin Epidemiol</source>
          <year>2018</year>
          <month>11</month>
          <volume>103</volume>
          <fpage>22</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2018.06.015</pub-id>
          <pub-id pub-id-type="medline">29981872</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(18)30085-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Leslie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rayson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Using JK fold cross validation to reduce variance when tuning NLP models</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online Jun 19, 2018</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1806.07139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Olshen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>Classification and regression trees</source>
          <year>1984</year>
          <publisher-loc>Monterey, CA</publisher-loc>
          <publisher-name>Wadsworth &#38; Brooks/Cole Advanced Books &#38; Software</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Flaxman</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Vahdatpour</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Population</surname>
              <given-names>HMRC</given-names>
            </name>
          </person-group>
          <article-title>Random forests for verbal autopsy analysis: multisite validation study using clinical diagnostic gold standards</article-title>
          <source>Popul Health Metr</source>
          <year>2011</year>
          <month>08</month>
          <day>04</day>
          <volume>9</volume>
          <fpage>29</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pophealthmetrics.biomedcentral.com/articles/10.1186/1478-7954-9-29"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1478-7954-9-29</pub-id>
          <pub-id pub-id-type="medline">21816105</pub-id>
          <pub-id pub-id-type="pii">1478-7954-9-29</pub-id>
          <pub-id pub-id-type="pmcid">PMC3160922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liaw</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Classification and regression by randomForest</article-title>
          <source>R News</source>
          <year>2002</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>18</fpage>
          <lpage>22</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <month>9</month>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>ISTAT Demography</source>
          <access-date>2023-05-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://demo.istat.it/">http://demo.istat.it/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>The R Project for Statistical Computing</source>
          <access-date>2023-05-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.R-project.org">https://www.R-project.org</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Building predictive models in R using the caret package</article-title>
          <source>J Stat Soft</source>
          <year>2008</year>
          <volume>28</volume>
          <issue>5</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v028.i05</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>Regression Modeling Strategies: With Applications to Linear Models, Logistic Regression, and Survival Analysis</source>
          <year>2001</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maenner</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yeargin-Allsopp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Naarden Braun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Schieve</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Development of a machine learning algorithm for the surveillance of autism spectrum disorder</article-title>
          <source>PLoS One</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>12</issue>
          <fpage>e0168224</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0168224"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0168224</pub-id>
          <pub-id pub-id-type="medline">28002438</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-07049</pub-id>
          <pub-id pub-id-type="pmcid">PMC5176307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kononenko</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for medical diagnosis: history, state of the art and perspective</article-title>
          <source>Artif Intell Med</source>
          <year>2001</year>
          <month>08</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>89</fpage>
          <lpage>109</lpage>
          <pub-id pub-id-type="doi">10.1016/s0933-3657(01)00077-x</pub-id>
          <pub-id pub-id-type="medline">11470218</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(01)00077-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>AhmedK</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aljahdali</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Naimatullah Hussain</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Comparative prediction performance with support vector machine and random forest classification techniques</article-title>
          <source>IJCA</source>
          <year>2013</year>
          <month>05</month>
          <day>17</day>
          <volume>69</volume>
          <issue>11</issue>
          <fpage>12</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.5120/11885-7922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Performance of corporate bankruptcy prediction models on imbalanced dataset: The effect of sampling methods</article-title>
          <source>Knowl Based Syst</source>
          <year>2013</year>
          <month>3</month>
          <volume>41</volume>
          <fpage>16</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2012.12.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The elements of statistical learning</source>
          <year>2009</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>745</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ballesteros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mack</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sleet</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The epidemiology of unintentional and violence-related injury morbidity and mortality among children and adolescents in the United States</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2018</year>
          <month>03</month>
          <day>28</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>616</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph15040616"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph15040616</pub-id>
          <pub-id pub-id-type="medline">29597289</pub-id>
          <pub-id pub-id-type="pii">ijerph15040616</pub-id>
          <pub-id pub-id-type="pmcid">PMC5923658</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rockett</surname>
              <given-names>IR</given-names>
            </name>
            <name name-style="western">
              <surname>Regier</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Kapusta</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Coben</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Hanzlick</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Todd</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Sattin</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Kleinig</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>GS</given-names>
            </name>
          </person-group>
          <article-title>Leading causes of unintentional and intentional injury mortality: United States, 2000–2009</article-title>
          <source>Am J Public Health</source>
          <year>2012</year>
          <month>11</month>
          <volume>102</volume>
          <issue>11</issue>
          <fpage>e84</fpage>
          <lpage>e92</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2012.300960</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elboray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Elawdy</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Dewedar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Elezz</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>El-Setouhy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Hirshon</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Knowledge, attitudes, and practices of family physicians and nurses regarding unintentional injuries among children under 15 years in Cairo, Egypt</article-title>
          <source>Int J Inj Contr Saf Promot</source>
          <year>2017</year>
          <month>03</month>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26176681"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/17457300.2015.1056808</pub-id>
          <pub-id pub-id-type="medline">26176681</pub-id>
          <pub-id pub-id-type="pmcid">PMC4714960</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
