<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e50379</article-id>
      <article-id pub-id-type="pmid">38190245</article-id>
      <article-id pub-id-type="doi">10.2196/50379</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Tutorial</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Tutorial</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Generating Contextual Variables From Web-Based Data for Health Research: Tutorial on Web Scraping, Text Mining, and Spatial Overlay Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Levitz</surname>
            <given-names>Nicole</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rashidian</surname>
            <given-names>Laleh</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Galvez-Hernandez</surname>
            <given-names>Pablo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Institute of Health Policy, Management and Evaluation</institution>
            <institution>Dalla Lana School of Public Health</institution>
            <institution>University of Toronto</institution>
            <addr-line>Health Sciences Building, 4th Fl.</addr-line>
            <addr-line>155 College St</addr-line>
            <addr-line>Toronto, ON, M5T 3M6</addr-line>
            <country>Canada</country>
            <phone>1 6475752195</phone>
            <email>pau.galvez@utoronto.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6268-559X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gonzalez-Viana</surname>
            <given-names>Angelina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4992-4039</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Gonzalez-de Paz</surname>
            <given-names>Luis</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4767-8121</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Shankardass</surname>
            <given-names>Ketan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8410-2201</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Muntaner</surname>
            <given-names>Carles</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2405-5802</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Lawrence S Bloomberg Faculty of Nursing</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute of Health Policy, Management and Evaluation</institution>
        <institution>Dalla Lana School of Public Health</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Public Health Agency of Catalonia</institution>
        <institution>Health Department</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Primary Healthcare Transversal Research Group</institution>
        <institution>Institut d’Investigacions Biomèdiques August Pi i Sunyer</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Consorci d'Atenció Primària de Salut Barcelona Esquerra</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Heath Sciences</institution>
        <institution>Wilfrid Laurier University</institution>
        <addr-line>Waterloo, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>MAP Centre for Urban Health Solutions</institution>
        <institution>Li Ka Shing Knowledge Institute</institution>
        <institution>St Michael’s Hospital</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Dalla Lana School of Public Health</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Pablo Galvez-Hernandez <email>pau.galvez@utoronto.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>1</month>
        <year>2024</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e50379</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>11</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>11</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Pablo Galvez-Hernandez, Angelina Gonzalez-Viana, Luis Gonzalez-de Paz, Ketan Shankardass, Carles Muntaner. Originally published in JMIR Public Health and Surveillance (https://publichealth.jmir.org), 08.01.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on https://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2024/1/e50379" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Contextual variables that capture the characteristics of delimited geographic or jurisdictional areas are vital for health and social research. However, obtaining data sets with contextual-level data can be challenging in the absence of monitoring systems or public census data.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We describe and implement an 8-step method that combines web scraping, text mining, and spatial overlay analysis (WeTMS) to transform extensive text data from government websites into analyzable data sets containing contextual data for jurisdictional areas.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This tutorial describes the method and provides resources for its application by health and social researchers. We used this method to create data sets of health assets aimed at enhancing older adults’ social connections (eg, activities and resources such as walking groups and senior clubs) across the 374 health jurisdictions in Catalonia from 2015 to 2022. These assets are registered on a web-based government platform by local stakeholders from various health and nonhealth organizations as part of a national public health program. Steps 1 to 3 involved defining the variables of interest, identifying data sources, and using Python to extract information from 50,000 websites linked to the platform. Steps 4 to 6 comprised preprocessing the scraped text, defining new variables to classify health assets based on social connection constructs, analyzing word frequencies in titles and descriptions of the assets, creating topic-specific dictionaries, implementing a rule-based classifier in R, and verifying the results. Steps 7 and 8 integrate the spatial overlay analysis to determine the geographic location of each asset. We conducted a descriptive analysis of the data sets to report the characteristics of the assets identified and the patterns of asset registrations across areas.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We identified and extracted data from 17,305 websites describing health assets. The titles and descriptions of the activities and resources contained 12,560 and 7301 unique words, respectively. After applying our classifier and spatial analysis algorithm, we generated 2 data sets containing 9546 health assets (5022 activities and 4524 resources) with the potential to enhance social connections among older adults. Stakeholders from 318 health jurisdictions registered identified assets on the platform between July 2015 and December 2022. The agreement rate between the classification algorithm and verified data sets ranged from 62.02% to 99.47% across variables. Leisure and skill development activities were the most prevalent (1844/5022, 36.72%). Leisure and cultural associations, such as social clubs for older adults, were the most common resources (878/4524, 19.41%). Health asset registration varied across areas, ranging between 0 and 263 activities and 0 and 265 resources.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The sequential use of WeTMS offers a robust method for generating data sets containing contextual-level variables from internet text data. This study can guide health and social researchers in efficiently generating ready-to-analyze data sets containing contextual variables.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>web scraping</kwd>
        <kwd>text mining</kwd>
        <kwd>spatial overlay analysis</kwd>
        <kwd>program evaluation</kwd>
        <kwd>social environment</kwd>
        <kwd>contextual variables</kwd>
        <kwd>health assets</kwd>
        <kwd>social connection</kwd>
        <kwd>multilevel analysis</kwd>
        <kwd>health services research</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Contextual variables refer to the social or physical attributes of geographic or jurisdictional areas (eg, country, city, neighborhood, and administrative health area) that are not derived from the characteristics of their members [<xref ref-type="bibr" rid="ref1">1</xref>]. Common examples include social cohesion [<xref ref-type="bibr" rid="ref2">2</xref>], social capital [<xref ref-type="bibr" rid="ref3">3</xref>], and presence of green spaces [<xref ref-type="bibr" rid="ref4">4</xref>]. Contextual variables have multiple applications in health and social research. As people living in the same community or context are likely to be exposed to a similar environment, contextual variables can be used in multilevel models to explain variability in health outcomes [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>Although information on some contextual variables, such as census data, is widely available, accessing context-level data in emerging research fields can pose significant challenges. For example, monitoring systems may not exist yet to fully capture the social determinants of health (SDOH) across delimited areas. In addition, there may not be data available on the exposure and implementation of large-scale interventions targeting SDOH, making program and implementation evaluation studies challenging or impossible [<xref ref-type="bibr" rid="ref6">6</xref>]. This could be the case for regional or state public policies and public health programs, such as provincial public health programs to promote local intersectoral collaborations to tackle SDOH [<xref ref-type="bibr" rid="ref7">7</xref>] or national legislation to promote healthy nutrition to prevent obesity [<xref ref-type="bibr" rid="ref8">8</xref>]. As these policies and programs can be implemented without an evaluation plan, and data might be complex or unavailable, they often remain unevaluated [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>When structured databases or primary data gathering are not feasible, the internet can be a valuable resource for compiling information to define contextual variables. However, this presents several challenges: internet data are often cluttered, fragmented, and spread over multiple websites [<xref ref-type="bibr" rid="ref10">10</xref>]. Moreover, the content of most websites is not designed for use by health researchers nor is it grouped by relevant contextual areas. To overcome these challenges, we developed a novel 8-step method, which we have termed web scraping, text mining, and spatial overlay analysis (WeTMS) to collect large amounts of internet data from websites, transforming it into meaningful data sets containing research-relevant variables, and classifying them based on delimited geographical or jurisdictional areas.</p>
        <p>This method combines the techniques used in web scraping, text processing and mining, and spatial analysis. Web scraping, also known as web data mining, involves the creation of programs that can automatically download, parse, organize, and store information collected from the web in structured data sets [<xref ref-type="bibr" rid="ref11">11</xref>]. This process is more efficient and less prone to errors compared with the traditional and laborious process of manually copying and pasting internet information into a spreadsheet [<xref ref-type="bibr" rid="ref11">11</xref>]. Web scraping has been gaining traction in health research, fueling the rise of <italic>infodemiology</italic>, which analyzes the spread and impact of web-based information to inform public health and policy [<xref ref-type="bibr" rid="ref12">12</xref>]. As of January 2023, a search of the keyword “web scraping” in Medline yielded 105 records, 95 of which were published starting from 2019. Articles using web scraping in health and social research mostly used information from social media [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] (eg, Twitter, Instagram, and TikTok), forums [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], business and review websites [<xref ref-type="bibr" rid="ref17">17</xref>], and news web pages [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>Similarly, text mining has been increasingly applied in health and social research [<xref ref-type="bibr" rid="ref19">19</xref>]. Text mining is the process of extracting meaningful information from large volumes of unstructured text data using techniques such as text classification, sentiment analysis, and pattern recognition [<xref ref-type="bibr" rid="ref19">19</xref>]. Examples include using sentiment analysis on social media posts to identify health and mental well-being issues [<xref ref-type="bibr" rid="ref20">20</xref>] and characterizing mental health problems [<xref ref-type="bibr" rid="ref21">21</xref>]. In addition, topic modeling has been used to understand public perceptions of the COVID-19 pandemic on Twitter [<xref ref-type="bibr" rid="ref22">22</xref>] and to uncover health-related topics on social media [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>Spatial overlay analysis is a group of methodologies used in geographic information systems to simultaneously display multiple layers of spatial information and assess the relationships between different geographic features and attributes [<xref ref-type="bibr" rid="ref24">24</xref>]. Spatial overlay analysis can be used to examine the relationships between multiple layers of geospatial data to locate spatial points (eg, coordinates) in delimited geographic or jurisdictional areas. Geographic information system methods have been used in health geography and environmental epidemiology to study the geographic incidence or distribution of diseases [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>When census data or data sets containing contextual variables are unavailable, researchers may have to engage in laborious manual extraction of web-based data, which can be time-consuming and susceptible to inaccuracies [<xref ref-type="bibr" rid="ref11">11</xref>]. Currently, there is a gap in the literature regarding methods that enable researchers to automatically convert large volumes of internet text information into meaningful, ready-to-analyze data sets containing contextual data. We propose that by combining techniques used in WeTMS, researchers can efficiently extract, process, and geolocate vast amounts of internet text data to produce structured data sets that encompass variables reflecting the contextual characteristics of specific geographic or jurisdictional areas.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The aims of this study are 2-fold. First, we outline the implementation of the WeTMS method through a research case, creating data sets with contextual variables on health assets that could improve social connections for older adults across various health jurisdictions in Catalonia, Spain. Second, we analyze these data sets to describe the characteristics and registration trends of these health assets by local stakeholders.</p>
        <p>In this tutorial, we first introduce the WeTMS method and describe its application to a research case for compiling data sets of health assets that could enhance social connections among older adults in the health jurisdictions of Catalonia. These assets include activities and resources in the community that can facilitate social interaction, such as social activities, walking groups for retirees, libraries, and senior community centers [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <p>Next, we use these new data sets to extract assets with the potential to foster older adults’ social connections and conduct a descriptive analysis to explore their characteristics and asset registration trends across jurisdictions. This analysis demonstrates the potential application of this method in program evaluation. In addition, we discuss the challenges that health and social researchers may face during the WeTMS process and provide resources and programming codes to facilitate its application in other areas of research.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Context and Data Sources</title>
        <p>In 2015, the government of Catalonia launched the Assets and Health platform as a component of 2 provincial public health programs that aimed to promote intersectoral collaborations among health and nonhealth organizations to tackle complex public health issues, including older adults’ lack of social connections [<xref ref-type="bibr" rid="ref7">7</xref>]. The Assets and Health platform (created by the Asturias Health Observatory and shared through the Spanish Community Health Alliance) is a search engine and repository where stakeholders from multiple local organizations can register community health assets [<xref ref-type="bibr" rid="ref27">27</xref>]. Health assets are activities and resources within the community that contribute to maintaining the health and well-being of individuals and groups [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
        <p>Health assets were registered as “activities” (time-bound initiatives and structured interventions, like arts and crafts or supervised walking outings) and “resources” (permanent community fixtures such as associations, parks, and civic centers). Once registered, each health asset is stored on an individual website detailing characteristics, such as its title, description, location, and target population. These individual websites are linked to a search engine, enabling stakeholders to locate assets available in their basic health areas (BHAs), which can be used in collaborative interventions to address public health problems. Each BHA in Catalonia is a local health jurisdiction that functions as an administrative unit within the Catalan healthcare system [<xref ref-type="bibr" rid="ref29">29</xref>]. In urban settings, BHAs typically cover specific neighborhoods or districts, whereas in rural areas, they may span one or more municipalities, as determined by demographic, epidemiological, and accessibility considerations.</p>
      </sec>
      <sec>
        <title>Overview of the WeTMS Method</title>
        <p>The 8 steps of the proposed method are summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The first 3 steps involve identifying and extracting website data through web scraping, and then storing the information in structured data sets to facilitate their analysis. Steps 4-6 describe the application of text processing and mining techniques to analyze the scraped data, identify patterns in the text content, and classify the data into new variables and categories. Steps 7 and 8 elaborate on the use of spatial overlay analysis to locate data within delimited geographic or jurisdictional areas.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the web scraping, text mining, and spatial overlay (WeTMS) steps for generating contextual variables from unstructured web-based data.</p>
          </caption>
          <graphic xlink:href="publichealth_v10i1e50379_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Steps 1 to 3: Using Web Scraping for Data Extraction</title>
        <sec>
          <title>Step 1: Defining Variables of Interest</title>
          <p>The target information, including the type of data and the desired outputs for web scraping, was first outlined to avoid extracting irrelevant information. We aimed to generate context-level variables capturing the attributes and registration dates of community health assets to enhance social connections among older adults in Catalan BHAs. The data to be extracted from each website detailing a health asset included text elements such as title, description, target population, location, asset registration date, cost, duration, and activity topics. Other data types that could be targeted for extraction include images, links, and metadata, while outputs might be structured data sets such as CSV files, which contain prespecified variables.</p>
        </sec>
        <sec>
          <title>Step 2: Identifying and Exploring the Data Sources</title>
          <p>The second step involves identifying the URLs or web addresses containing the target information and understanding how their content is structured. Identifying URLs can present challenges such as information being dispersed across multiple websites or URLs being hidden or changing [<xref ref-type="bibr" rid="ref30">30</xref>]. Consequently, sites may be missed because the web scraping “crawler” (the portion of code responsible for finding each URL) requires exact web addresses [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
          <p>In our initial exploration of the Assets and Health search engine, only the last 100 registered health assets were displayed, and those URLs were hidden. A pattern in the URLs for websites describing each health asset, comprising a fixed segment and variable reference number, was identified using Chrome DevTools for network inspection.</p>
          <p>The source code of the target websites, usually HTML, was examined to discern their organization and structure. The attributes of HTML elements containing target data (eg, asset titles and descriptions) were identified and used to program the web scraper. Additional information and resources required to implement step 2 are available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref36">36</xref>].</p>
        </sec>
        <sec>
          <title>Step 3: Extracting and Parsing URL Source Data</title>
          <p>After identifying the relevant URLs and HTML elements, a web scraper comprising a crawler, parser, and data handler was developed using Python 3.10 [<xref ref-type="bibr" rid="ref37">37</xref>] on the PyCharm 2022.2.2 environment, and the libraries “requests” [<xref ref-type="bibr" rid="ref38">38</xref>], “beautifulsoup4” [<xref ref-type="bibr" rid="ref39">39</xref>], and “pandas” [<xref ref-type="bibr" rid="ref40">40</xref>]. The web scraper also incorporated error-handling mechanisms to manage potential issues, such as connection failures when URLs were nonexistent. The code is explained in detail in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> and is publicly accessible on GitHub [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
          <p>The web crawler requested 50,000 URLs from websites linked to the Assets and Health platform, comprising 25,000 activities and 25,000 resources, to capture reference numbers from the onset of the program, from July 2015 to December 23, 2022, which was the day when the data were scraped. The program “parser” then analyzed the HTML code of each existing URL and extracted the desired elements, stripping the text, which was automatically stored in 2 CSV data sets for activities and resources. The encoding of the data sets was revised to avoid mismatches between the character set used to represent the text data and that of the scraped text, as this can result in certain characters being displayed as symbols. An initial review of scrapped health assets was conducted to exclude irrelevant observations. We filtered out assets registered outside Catalonia or targeted solely at children and youth before proceeding with the text processing and mining steps.</p>
        </sec>
      </sec>
      <sec>
        <title>Steps 4 to 6: Text Processing and Mining to Generate Meaningful Contextual Variables</title>
        <sec>
          <title>Step 4: Cleaning and Preprocessing</title>
          <p>Cluttered and inconsistent text data obtained from web scraping were preprocessed for analysis [<xref ref-type="bibr" rid="ref31">31</xref>]. We used RStudio (version 2022.12.0), with the “tm” [<xref ref-type="bibr" rid="ref42">42</xref>] and “qdap” [<xref ref-type="bibr" rid="ref43">43</xref>] libraries. The “tm” library provides functions for cleaning, preprocessing, and analyzing text data. The “qdap” library allows text categorization, word frequency calculation, tokenization, and clustering.</p>
          <p>The first author manually examined a set of activities and resources from scraped text to assess the quality and structure of the text data. This step was crucial for identifying inconsistencies, such as assigning different age ranges (eg, 60 and 65 years) to older adults simultaneously.</p>
          <p>To preprocess the text data, the columns containing free text, namely titles and descriptions of the health assets, were merged and converted to “corpus” objects—a data structure for text data in R. Columns with text derived from fixed responses were not preprocessed. Irrelevant stop words were then removed, text data were segmented into individual units that could be transformed into numerical variables (tokenization), and words were normalized to their root form (stemming) [<xref ref-type="bibr" rid="ref44">44</xref>]. The 2 data sets, containing health assets registered as activities and resources, were processed independently. The code with explanations for this step can be accessed through GitHub [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        </sec>
        <sec>
          <title>Step 5: Defining Dictionaries and Categorizing Text</title>
          <p>We used text mining techniques to develop a classification system to filter and categorize health assets for older adults to enhance social connections from all other registered activities and resources on the platform. Text classification is pivotal for the generation of new variables of interest from unstructured data, because it can categorize text into predefined classes or labels.</p>
          <p>First, to classify health assets, we predefined new variables and categories created through a deductive approach, based on the literature on social connections [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. We also used inductive processes to create new variables and categories based on patterns identified during the text analysis and discussion among the research team. <xref ref-type="table" rid="table1">Table 1</xref> lists the new variables, categories, type of creation process, and literature sources. Detailed definitions of the new variables and categories are provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref51">51</xref>].</p>
          <p>Second, we created document-term matrixes from the corpus of preprocessed text data containing health asset titles and descriptions. A document-term matrix is a mathematical matrix that describes the frequency of terms in a collection of textual data [<xref ref-type="bibr" rid="ref31">31</xref>]. The frequency of each word was calculated and sorted based on their frequency values, representing the number of times a term appeared in the title and description of health assets.</p>
          <p>Third, over the course of 3 meetings, 2 researchers (PG-H and CM) identified and selected high-frequency words that were repeated 15 times or more in the scraped data, grouped them into topic-specific dictionaries, and refined the list. Eligibility criteria, informed by the definitions of each new variable category, were developed to determine which words to include in each dictionary.</p>
          <p>Finally, a classification system was developed using a rule-based classifier. A rule-based classifier categorizes data into predefined classes by applying a set of human-defined rules and conditions based on the features and attributes of the data [<xref ref-type="bibr" rid="ref52">52</xref>]. We opted for a rule-based system over more complex machine learning classifiers, as this approach is better suited for scenarios with a limited number of specific labels and smaller data sets and ensures efficiency and interpretability without the need for extensive training data [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. Topic-specific dictionaries consist of lists of words related to the definitions of the predefined variable categories as conditions to classify the text data [<xref ref-type="bibr" rid="ref54">54</xref>]. Finally, an R function was developed to automatically generate a new column for each new variable, search for dictionary words in the scraped data, and assign a new category value if a word was found. The classifier system, including topic-specific dictionaries, is accessible on GitHub [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>New variables and categories created for the classification system of health assets.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="110"/>
              <col width="490"/>
              <col width="210"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">New variables</td>
                  <td>Categories within each variable</td>
                  <td>Source columns from scraped text data</td>
                  <td>Creation process and literature sources</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Activities</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Activity type</td>
                  <td>Leisure and skill development, physical activity, social facilitation, psychological therapies, awareness campaigns, health and social care, and befriending</td>
                  <td>Title and description</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Format</td>
                  <td>Group and individual</td>
                  <td>Title and description</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Focus</td>
                  <td>Direct and indirect</td>
                  <td>Title and description</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Age</td>
                  <td>Children, youth, adults, older adults, general population, minors unspecified, and adults unspecified</td>
                  <td>Description, target population, and topics</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref50">50</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Gender</td>
                  <td>Women, men, nonbinary, and any</td>
                  <td>Description, target population, and topics</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref51">51</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Vulnerable populations</td>
                  <td>Migrants, caregivers, substance use, physical diseases, risk social exclusion, mental diseases, and all<sup>a</sup></td>
                  <td>Title, description, target population, and activity topics</td>
                  <td>Inductive</td>
                </tr>
                <tr valign="top">
                  <td colspan="5">
                    <bold>Resources</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Resource type</td>
                  <td>Municipal natural and green space, health institution, social welfare institution, education institution, patient advocacy group, charitable and voluntary organization, faith-based organization, parent school associations, public library, civic center, sports institution, leisure and cultural association, neighborhood association, and cultural institution</td>
                  <td>Title and description</td>
                  <td>Inductive</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Focus</td>
                  <td>Direct and indirect</td>
                  <td>Title and description</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Age</td>
                  <td>Children, youth, adults, older adults, general population, minors unspecified, and adults unspecified</td>
                  <td>Title, description, and topics</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref50">50</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Gender</td>
                  <td>Women, men, nonbinary, and any</td>
                  <td>Description and topics</td>
                  <td>Deductive [<xref ref-type="bibr" rid="ref51">51</xref>]</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Vulnerable populations</td>
                  <td>Migrants, caregivers, substance use, physical diseases, risk social exclusion, mental diseases, and all<sup>a</sup></td>
                  <td>Title, description, and topics</td>
                  <td>Inductive</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>“Substance use,” “physical diseases,” “risk social exclusion,” “mental diseases,” and “all” are simplified terms for target populations experiencing substance use, physical diseases, mental diseases, those at risk of social exclusion, and the general population.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Step 6: Manual Verification</title>
          <p>The categories assigned to the new variables for each health asset were reviewed for inconsistencies by 2 researchers with expertise in the topic (PG-H and Angeli Chacaliaza). Manual verification refers to a one-by-one examination of the classified data by human reviewers to ensure the accuracy of the new variables created [<xref ref-type="bibr" rid="ref55">55</xref>]. Verification involved an independent review of 200 health assets by 2 researchers to assess new variable categories based on eligibility criteria. Discrepancies were resolved through web meetings with manual reclassification if necessary. The data sets were then divided into groups of 500 health assets for independent review. Agreement rates between the verified and automatically generated variables were computed using the Excel software.</p>
        </sec>
      </sec>
      <sec>
        <title>Step 7 and 8: Spatial Overlay Analysis to Locate Observations</title>
        <sec>
          <title>Step 7: Identifying the Layers of Spatial Data</title>
          <p>We used a spatial overlay analysis to generate a new variable that identified the BHA in which each health asset was located. The analysis was conducted in RStudio (version 2022.12.0) because of its many packages specifically designed for spatial overlay analysis, such as “sp,” [<xref ref-type="bibr" rid="ref56">56</xref>] “sf,” [<xref ref-type="bibr" rid="ref57">57</xref>] “rgdal,” [<xref ref-type="bibr" rid="ref58">58</xref>] “rgeos,” [<xref ref-type="bibr" rid="ref59">59</xref>] and “ggplot2” [<xref ref-type="bibr" rid="ref60">60</xref>].</p>
          <p>In this step, 2 spatial layers were identified. The first layer consisted of polygonal data depicting 374 BHAs in Catalonia. The data were obtained from the open database of the General Directorate of Health Planning and Research in Catalonia. Polygonal data can represent geographic or jurisdictional regions by defining their boundaries [<xref ref-type="bibr" rid="ref61">61</xref>]. The second layer comprises a vector of geographic point data for each health asset. Point data consisted of longitude and latitude coordinates obtained from the addresses scraped for each activity and resource using Excel add-on GeoCode, a map tool that uses Google services to automatically retrieve longitudes and latitudes from addresses.</p>
        </sec>
        <sec>
          <title>Step 8: Matching Coordinate Reference System and Finding Intersection Points</title>
          <p>Step 8 involves transforming the spatial data layers into a common coordinate reference system (CRS) and identifying the intersecting points. The CRS of a spatial object determines its location on the Earth’s surface. Thus, analyzing 2 or more spatial layers with different CRS can produce misleading outcomes [<xref ref-type="bibr" rid="ref61">61</xref>]. To identify areas of overlap between health asset coordinates and BHAs, the following steps were taken: (1) coordinates were transformed to a simple feature object format, (2) simple feature objects were converted into single points using the “st_point” function, and (3) both spatial data layers were converted to a common CRS.</p>
          <p>Finally, a spatial overlay analysis was performed using the “st_intersects” function to determine the BHA polygons with which each health asset point data intersected. The function was applied in a loop to each row of the activity and resource data sets. The resulting outputs are stored in new columns named “Code_BHA” and “Name_BHA.” The code, along with explanations for steps 7 and 8, is available in GitHub [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Data Set Filtering and Descriptive Analysis</title>
        <p>The new data sets were filtered using the new variables and categories to select health assets with the potential to foster social connections among older adults from all scrapped assets. Eligible health assets registered as activities and resources were included if (1) the target population included older adults, (2) the format was either group activities or individual activities fostering social connections (eg, befriending), and (3) they were located in Catalonia.</p>
        <p>A descriptive analysis was conducted in RStudio (version 2022.12.0), to understand the characteristics and asset registration trends of stakeholders across BHAs. Frequencies and proportions were calculated for each category of the new variables (activity type, format, focus, age, sex, and vulnerable populations). Temporal registration trends of activities and resources were analyzed using time-series graphs with local polynomial regression fitting lines, a nonparametric method used to describe the deterministic variation in data [<xref ref-type="bibr" rid="ref62">62</xref>]. We also computed the average weekly registration of activities and resources in each BHA, assuming a Poisson distribution, where λ represents the weekly health asset registrations per area. Finally, visualization techniques were used to analyze the temporal evolution of the registration of activities and resources on the Assets and Health websites across BHAs, as well as their geographic distribution.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The data collected in this study were publicly accessible and did not contain any personal or sensitive information. Thus, ethical approval and participant consent were not required for this study. In addition, before data collection, we verified that the websites of interest did not have any explicit prohibitions against automatic web scraping, such as a “robots.txt” file or similar declarations.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Results From WeTMS</title>
        <sec>
          <title>Web Scraping</title>
          <p>Of the 50,000 URLs inspected, 17,305 contained websites describing health assets (9558 activities and 7747 resources) registered with local stakeholders from July 2015 to December 2022. The number of observations obtained through web scraping matched the total number of assets reported on the Assets and Health platform, thus demonstrating the efficacy of the web scraper. No missing values were detected for the main variables (eg, title, description, location, and date of asset registration). In the activity data set, 9.56% (480/5022) of observations did not disclose the <italic>activity cost</italic>, and 49.04% (2463/5022) did not report the <italic>activity duration</italic>. An example of an activity and resource, as they appear in the scraped data sets, is provided in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
          <boxed-text id="box1" position="float">
            <title>Example of a health asset registered as activity and resource extracted from the scraped text (English translation).</title>
            <p>
              <bold>Activity row #860</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Title: School for Adults</p>
              </list-item>
              <list-item>
                <p>Description: Reading and writing classes</p>
              </list-item>
              <list-item>
                <p>Population: Over 65 years old—anyone (district neighbors over 65 years old)</p>
              </list-item>
              <list-item>
                <p>Location: Campoamor Street 92, 08204, Civic Center Rogelio Soto, Sabadell, Barcelona, Catalonia, Spain</p>
              </list-item>
              <list-item>
                <p>Organizations: Civic Center Rogelio Soto, Campoamor Neighborhood Association</p>
              </list-item>
              <list-item>
                <p>Registration date: February 6, 2020</p>
              </list-item>
              <list-item>
                <p>Is free: Yes</p>
              </list-item>
              <list-item>
                <p>Categories: Women, older adults, people at risk of exclusion, school of health, mental health, or emotional well-being</p>
              </list-item>
              <list-item>
                <p>Time activity: From September 13, 2019, to June 30, 2020</p>
              </list-item>
            </list>
            <p>
              <bold>Resource row #1961</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Title: Association of Retirees and Pensioners, La Pineda</p>
              </list-item>
              <list-item>
                <p>Description: Association that aims to promote cultural training and sports activities for older adults, as well as avoiding loneliness and social isolation, fostering relationships between them</p>
              </list-item>
              <list-item>
                <p>Registration date: May 15, 2017</p>
              </list-item>
              <list-item>
                <p>Location: Alfredo Kraus Street 20, 43481, La Pineda Vila-seca, Tarragona, Catalonia, Spain</p>
              </list-item>
              <list-item>
                <p>Categories: Older adults, mental health or emotional well-being, physical activity, community health</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Text Mining</title>
          <p>From the text processing of the corpus of titles and descriptions, a total of 12,560 tokens (or raw words) were identified for activities and 7301 for resources, of which 996 (7.9%) and 594 (8.1%) words had a frequency &gt;15. Using words with a frequency of &gt;15, we constructed 73 topic-specific dictionaries corresponding to each category of the new variables. For instance, for the <italic>physical activity</italic> category under the <italic>activity type</italic> variable, the topic-specific dictionary included words such as “physical,” “exercise,” “gym,” “yoga,” and “sport.” <xref rid="figure2" ref-type="fig">Figure 2</xref> presents popular dictionary words for each category within the <italic>activity type</italic> variable.</p>
          <p>After applying the rule-based classifier using topic-specific dictionaries, manual verification of the output yielded variable levels of agreement ranging from 62.02% (3417/5509) to 99.47% (4886/4912) across variables. For instance, variables with lower classification accuracy had a larger number of possible categories, a more evenly distributed number of observations across categories, or words repeated fewer than 15 times within the title and description corpus. The agreement rates between the verified and automatically generated databases are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Categories within the “activity type” variable showcasing popular words derived from topic-specific dictionaries (English translation).</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Agreement rate between manually verified and automatically classified data sets.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="470"/>
              <col width="0"/>
              <col width="500"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">New variables generated</td>
                  <td>Correctly assigned categories, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Activities data set (n=6260)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Age</td>
                  <td colspan="2">4855 (77.55)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Gender</td>
                  <td colspan="2">6215 (99.28)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Vulnerable populations</td>
                  <td colspan="2">5525 (88.26)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Activity type<sup>a</sup></td>
                  <td colspan="2">3417 (62.02)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Format<sup>a</sup></td>
                  <td colspan="2">5326 (96.67)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Focus<sup>a</sup></td>
                  <td colspan="2">5342 (96.97)</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Resources data set (n=4912)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Age</td>
                  <td colspan="2">4029 (82.02)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Gender</td>
                  <td colspan="2">4843 (98.59)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Vulnerable populations</td>
                  <td colspan="2">3883 (79.05)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Resource type</td>
                  <td colspan="2">3845 (78.28)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Focus</td>
                  <td colspan="2">4886 (99.47)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>Automatic classification of the categories for variables <italic>activity type</italic>, <italic>format</italic>, and <italic>focus</italic> was performed only for activities targeting older adults (n=5509).</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Spatial Overlay Analysis</title>
          <p>Coordinates for the locations of 0.36% (18/5055) of activities and 2.41% (109/4530) of resources were not identified. Manual searches on Google Maps using addresses allowed us to locate the coordinates for all but 7 activities and 2 resources. Through spatial overlay analysis, intersections between spatial points and BHAs were not identified for 26 activities or 4 resources. The newly generated columns for the variables <italic>Code_BHA</italic> and <italic>Name_BHA</italic> encompassed values for 318 distinct BHAs, representing 85% of the 374 BHAs.</p>
        </sec>
      </sec>
      <sec>
        <title>Results From Data Set Filtering and Descriptive Analysis</title>
        <sec>
          <title>Filtering Health Assets With Potential to Enhance Older Adults’ Social Connections</title>
          <p>Using the newly generated contextual variables, we filtered the data sets of activities and resources to identify those with the potential to foster social connections among older adults. From the initial 17,305 health assets identified, we obtained 9546 eligible health assets, comprising 5022 activities and 4524 resources. The reasons for exclusion and the stages in which health assets were discarded are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Flowchart for the filtering of health asset data sets by contextual variables related to social connection constructs generated using the web scraping, text mining, and spatial overlay (WeTMS) method. BHA: basic health area.</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Characteristics of Eligible Health Assets</title>
          <p>Of the health assets registered as activities, 24.59% (1235/5022) specifically targeted older adults, whereas 75.41% (3787/5022) targeted broader age ranges, including the older population. Most resources targeted the general population and included older adults, with only 4.12% (207/5022) being exclusively for older adults, such as civic centers for retirees. Only 2.49% (238/9546) of the health assets had a sex-specific target; these were predominantly women (n=212). Among all health assets, 13.5% (678/5022) of activities and 7.98% (361/4524) of resources were tailored for specific vulnerable groups, with physical or mental illness being the primary focus.</p>
          <p>Group-oriented activities promoting social interactions accounted for 99.56% (5000/5022) of the eligible activities. However, only 4.36% (219/5022) explicitly used concepts related to social connections (eg, loneliness and social isolation) in titles and descriptions. Over 57% (2862/5022) of the activities were cost-free, and the most common activity duration was 1 to 3 months (975/5022, 19.41%). Data on format, duration, and cost are not available for resources.</p>
          <p>The analysis of the new variable <italic>activity type</italic> showed that leisure and skill development activities were most common (1844/5022, 36.72%). This included group handcrafts, dance, painting, theater, cooking, choir courses, and conversation groups that focused on shared-interest topics. Group exercise activities (eg, walking groups) accounted for 31.08% (1561/5022) of the activities. Over 22% (1103/5022) of the activities involved group activities with health and social professionals outside the health care center, including psychological therapies and health and social care. Finally, 8.46% (425/5022) were social facilitation activities such as group meetings to share common interests (eg, film forums). Overall, more than half of these activities were registered between 2021 and 2022 (<xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
          <p>Almost 61.49% (2782/4524) of the registered resources facilitated exchange of knowledge and interests among older adults. These resources included leisure and cultural associations; public libraries; civic centers; and cultural, sports, and educational institutions. Municipal natural and green spaces where adults can gather accounted for 17.28% (782/4524) of the resources. A total of 595 (13.1%) health institutions and 140 (3.1%) social welfare institutions were found, including primary care centers, health and social foundations, and advocacy institutions promoting social inclusion. Other resources linked to health and social welfare include patient advocacy groups, faith-based organizations, and charitable and voluntary organizations. In contrast, most resources were registered before 2019 (<xref rid="figure5" ref-type="fig">Figure 5</xref>). A detailed descriptive analysis of the type, target population, focus, cost, format, and duration of health assets is included in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Number of activities with the potential to enhance older adults’ social connections by type and year of registration (2015-2022).</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Number of resources with potential to enhance older adults’ social connections, by type and year of registration (2015-2022).</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Overview of Registration Trends of Eligible Health Assets Across BHAs</title>
          <p>The first registry of a health asset on the Assets and Health websites occurred on July 23, 2015, and the last on December 23, 2022, the day when web scraping was conducted. Total registration of activities remained consistently low from the start of the program until early 2018, whereas for resources, a registration peak was observed in late 2016. Activity and resource registrations have increased from 2018 to mid-2020. A decline in registration was observed from early 2020 to mid-2021, coinciding with the outbreak of the COVID-19 pandemic. Local polynomial regression fitting lines showed a growing pattern in the registration of activities from 2021 onwards, whereas resource registration remained low (<xref rid="figure6" ref-type="fig">Figure 6</xref>).</p>
          <p>On the basis of the observed trends, 4 implementation periods were defined to better understand registration trends: period 1, from July 2015 to January 2018; period 2, from February 2018 to February 2020; period 3, from March 2020 to May 2021; and period 4, from the end of June 2021 to December 2022. During the first two and a half years of the program, the average number of activities and resources registered per week across all BHAs was 0.37 and 3.47, respectively, increasing to 11.83 and 25.27 in period 2. During the COVID-19 pandemic in period 3, these figures decreased to an average of 3.19 activities and 20.64 resources per week. period 4 had the highest registration rate for activities (38.52/wk).</p>
          <p>To calculate the registration trends in individual BHAs, we divided the number of BHAs with one or more activities registered by the total number of BHAs (n=374) for each period. We did not consider the resource data set because of the observed patterns suggesting centralized registration, rather than local registration. For instance, in late 2016, resources in 237 BHAs were registered in a single day. At the end of period 1, 8% (30/374) of the BHAs had one or more registered activities, which increased to 85% (318/374) by the end of period 4 (<xref rid="figure7" ref-type="fig">Figure 7</xref>; <xref ref-type="table" rid="table3">Table 3</xref>). The number of health assets registered per BHA varied significantly, ranging from 0 to 263 activities and 0 to 265 resources. The median number of activities registered per BHA from 2015 to 2022 was 5 (IQR 13.75) and 9 (IQR 10) for resources. <xref rid="figure8" ref-type="fig">Figure 8</xref> illustrates the geographic distribution of the activities and resources registered in each period.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Weekly registration trends of health assets from July 2015 to December 2022. The y-axis represents the number of registered activities and resources per week. The x-axis represents time, labeled by years for clarity.</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure7" position="float">
            <label>Figure 7</label>
            <caption>
              <p>Cumulative frequency of basic health areas (BHAs) with registered health assets aimed at enhancing older adults’ social connections from July 2015 to December 2022. Each point represents a BHA at the time of its first asset registration on the Assets and Health platform, cumulative.</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Average number of health assets registered per week and proportion of basic health areas (BHAs) with one or more activities registered per period.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="230"/>
              <col width="90"/>
              <col width="100"/>
              <col width="160"/>
              <col width="160"/>
              <col width="0"/>
              <col width="90"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td>Time periods</td>
                  <td colspan="2">Number of health assets</td>
                  <td colspan="3">λ (95% CI)<sup>a</sup></td>
                  <td colspan="2">BHAs with registered activities</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Activities</td>
                  <td>Resources</td>
                  <td>Activities</td>
                  <td>Resources</td>
                  <td colspan="2">Values, n<sup>b</sup></td>
                  <td>Cumulative proportion of BHA (%)<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Period 1: July 2015 to January 2018</td>
                  <td>50</td>
                  <td>462</td>
                  <td>0.37 (0.27-0.48)</td>
                  <td>3.47 (3.16-3.79)</td>
                  <td colspan="2">30</td>
                  <td>8</td>
                </tr>
                <tr valign="top">
                  <td>Period 2: February 2018 to February 2020</td>
                  <td>1290</td>
                  <td>2755</td>
                  <td>11.83 (11.19-12.48)</td>
                  <td>25.27 (24.33-26.21)</td>
                  <td colspan="2">154</td>
                  <td>49.2</td>
                </tr>
                <tr valign="top">
                  <td>Period 3: March 2020 to May 2021</td>
                  <td>562</td>
                  <td>301</td>
                  <td>8.64 (7.93-9.36)</td>
                  <td>4.63 (4.11-5.15)</td>
                  <td colspan="2">28</td>
                  <td>56.6</td>
                </tr>
                <tr valign="top">
                  <td>Period 4: June 2021 to December 2022</td>
                  <td>3120</td>
                  <td>941</td>
                  <td>38.52 (37.16-39.87)</td>
                  <td>11.62 (10.87-12.36)</td>
                  <td colspan="2">106</td>
                  <td>85</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>λ denotes the average number of health assets registered per week in each period.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>Unique BHAs that registered activities targeting social connections in older adults for the first time in the specified period.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>BHAs that registered such activities up to and including each period with earlier registrations.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <fig id="figure8" position="float">
            <label>Figure 8</label>
            <caption>
              <p>Geographic distribution of activities and resources with potential to enhance older adults’ social connections across basic health areas.</p>
            </caption>
            <graphic xlink:href="publichealth_v10i1e50379_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Innovation: Generating Area-Specific Contextual Variables From Unstructured Web-Based Data</title>
        <p>We introduce a novel approach for generating area-specific contextual variables from unstructured website data using WeTMS. By combining the methods commonly used in computer and data science, we were able to efficiently gather and transform large amounts of website data into comprehensive data sets of theoretically informed variables. The resulting data sets enabled us to identify and characterize health assets with the potential to enhance social connections among older adults registered within health jurisdictional areas from 2015 to 2022. In addition, this approach allowed us to examine area-specific registration trends for health assets, showing the use of the Assets and Health platform developed as part of a public health strategy in Catalonia. We provided detailed explanations of concepts, steps, and the code used, and included supplementary information to facilitate the replication of the steps, attempting to familiarize novice readers with these techniques.</p>
      </sec>
      <sec>
        <title>Applications of the WeTMS Method</title>
        <p>Our method provides a tool for researchers interested in developing new contextual variables when data are scarce or difficult to obtain using traditional means. Researchers in fields such as public health, nursing, and social epidemiology who study the impact of emerging health and social phenomena on health outcomes and determinants of health can benefit from this method. A practical example of an emerging social determinant of health, such as precarious employment [<xref ref-type="bibr" rid="ref63">63</xref>], can consist of applying the web-scraping steps to obtain website data from employment portals, text mining to analyze posts, identifying precarious job offers, and spatial overlay analysis to locate them into geographic areas and study the effect on population outcomes using multilevel modeling. Researchers and program evaluators in health services research and implementation science can use this method to obtain data to conduct descriptive analyses explaining policy adoption within jurisdictional or geographic areas, following the research case outlined in this study.</p>
        <p>A key feature of this method is that its steps can be implemented in sequence or independently, depending on the research goals. For example, researchers interested in generating new variables from text data without locating them in specific geographic areas can follow steps 1 to 6, which involve web scraping, text processing, and mining. If a data set is already available and researchers want to group the data by geographic settings, they can follow steps 7 and 8, which involve overlay spatial analysis.</p>
      </sec>
      <sec>
        <title>Challenges and Limitations</title>
        <p>There are challenges with this method that can limit its feasibility and application. In our example, extracting comparable data from multiple URLs was feasible because the websites associated with the Assets and Health platforms had similar HTML structures. The consistent placement of targeted information across websites simplifies the complexity of the web-scraping program. Thus, the attached web scraper code is only suitable for single or multiple websites with a limited number of distinct HTML structures (eg, forums, social media, and employment portals). Studies in which the target data are spread over different websites with varied designs require more advanced programming [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>A key step in the process—the creation of topic-specific dictionaries for the classification of observations—necessitates a deep understanding of the field and the terminology used in the data. Overall, the rule-based classifier demonstrated high accuracy. However, some variables, such as <italic>activity type</italic>, showed a higher rate of errors, in part because the dictionaries used to classify them contained only high-frequency words found in the titles and descriptions. Thus, our experience suggests that manual verification of new variables and categories by researchers with a comprehensive understanding of the data and subject matter is essential to ensure data validity before statistical analysis. However, this can be unfeasible for large data sets. In such scenarios, the impracticality of manual verification may necessitate the use of complex machine learning classifiers, presenting a trade-off in the confidence of the data that potentially compromises the robustness of the resulting variables [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        <p>Spatial overlay analysis effectively localizes health assets to their respective health jurisdictions, facilitated by the acquisition of complete addresses during the web scraping phase and the availability of a high-quality polygon map for analysis. Geospatial maps can be obtained from government agencies, nonprofit organizations, and commercial providers. If maps are unavailable, they can be created using accessible satellite imagery [<xref ref-type="bibr" rid="ref64">64</xref>]. However, the necessity for location-specific data (eg, addresses, postal codes, and cities) for each observation to generate contextual-level variables limits the range of suitable data sources available to researchers.</p>
        <p>Ethical and data protection considerations are important. Web scraping is typically permitted when data are publicly accessible and not subject to international legislation concerning personal data, trademarks, copyrights, or private information [<xref ref-type="bibr" rid="ref30">30</xref>]. Automatic extraction of internet data might be unfeasible if the data are not publicly available or if a website’s terms of service restrict automated collection and analysis [<xref ref-type="bibr" rid="ref65">65</xref>]. Researchers may consult ethics bodies to ensure that the methodology adheres to ethical standards when dealing with sensitive topics and personal information, even when relying on publicly available sources.</p>
        <p>In addition to these challenges, the method and data sets that it produces have limitations. The complexity of these steps requires introductory technical knowledge. Thus, we have provided detailed explanations and supplementary information that can support researchers, as they familiarize themselves with the steps. We anticipate that the compendium of concepts, code, software packages, and references gathered from trustworthy sources will serve as a resource for those interested in these techniques.</p>
        <p>Another limitation is the bias associated with the classifier system. The development of classification systems inherently relies on the subjective judgment of researchers. This can result in misclassifications, particularly those related to assumptions about race, gender, or social exclusion factors, especially within machine learning classifiers [<xref ref-type="bibr" rid="ref66">66</xref>]. It is advisable for researchers to engage in a reflexive process, carefully considering their assumptions in the definition and selection of dictionary words and to critically evaluate how these decisions may influence the investigation [<xref ref-type="bibr" rid="ref67">67</xref>].</p>
        <p>Finally, although the data sets generated are robust for descriptive analysis, researchers should proceed with clearly defined assumptions when using new context-level variables in statistical analyses, particularly in multilevel modeling or ecologic studies that aim to draw inferences. Although we successfully compiled 2 data sets of health assets from targeted websites, their comprehensiveness and accuracy in reflecting all identified assets across BHAs remain unknown. It is possible that some local organizations in BHAs were more or less likely to register assets on Assets and Health websites, influenced by context-specific factors such as management support and training on the platform [<xref ref-type="bibr" rid="ref68">68</xref>]. If feasible and ethical, it would be advisable for researchers to triangulate data to validate the data sets, thus verifying web-scraped data with secondary sources or direct inputs from stakeholders [<xref ref-type="bibr" rid="ref69">69</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The sequential use of WeTMS enabled the efficient creation of data sets of health assets registered with the Assets and Health websites in Catalonia, Spain, which aimed to enhance the social connections of older adults in local health jurisdictions. Our descriptive analysis demonstrated the usefulness of the data sets in exploring the characteristics of contextual variables, as well as in understanding temporal patterns and spatial distributions.</p>
        <p>Contextual-level variables generated via WeTMS may also be used in hierarchical analyses to evaluate the impact of contextual factors on health outcomes when more robust sources, such as census data, are not available. Adherence to data protection standards and ethical considerations should also guide this process. Although WeTMS has potential value for multiple research disciplines, it presents challenges and limitations, including the need for internet data sources to have comparable structures, a dependence on location data, the potential lack of representativeness in website content, the requirement for technical expertise, and a significant time investment for manual verification.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional details in step 2: identifying hidden URLs, finding HTML elements, and further references.</p>
        <media xlink:href="publichealth_v10i1e50379_app1.docx" xlink:title="DOCX File , 733 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Detailed explanation of Python libraries and web scraper code.</p>
        <media xlink:href="publichealth_v10i1e50379_app2.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Definitions of new variables and categories for text classification.</p>
        <media xlink:href="publichealth_v10i1e50379_app3.docx" xlink:title="DOCX File , 27 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Extended descriptive analysis of the type, target population, focus, cost, format, and duration of health assets.</p>
        <media xlink:href="publichealth_v10i1e50379_app4.docx" xlink:title="DOCX File , 21 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BHA</term>
          <def>
            <p>basic health area</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CRS</term>
          <def>
            <p>coordinate reference system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SDOH</term>
          <def>
            <p>social determinants of health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">WeTMS</term>
          <def>
            <p>web scraping, text mining, and spatial overlay analysis</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank Angeli Chacaliaza for his valuable contribution as a second reviewer in the manual verification stage and Ann Tourangeau, Martine Puts, and Tina Behrouzi for their comments and suggestions on the first draft. The authors also acknowledge Charles Anderson for his technical assistance. This study was not supported by any funding. The authors attest that there was no use of generative artificial intelligence technology in the generation of text, figures, or other informational content in this manuscript.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed during this study are available from the corresponding author upon reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>PG-H and CM conceptualized the study; PG-H wrote the study protocol, developed the method, and drafted the manuscript; and AG-V, LG-P, and CM provided expert input and conducted manuscript review and editing. All the authors have read and agreed to the published version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diez-Roux</surname>
              <given-names>AV</given-names>
            </name>
          </person-group>
          <article-title>Bringing context back into epidemiology: variables and fallacies in multilevel analysis</article-title>
          <source>Am J Public Health</source>
          <year>1998</year>
          <month>02</month>
          <volume>88</volume>
          <issue>2</issue>
          <fpage>216</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.88.2.216</pub-id>
          <pub-id pub-id-type="medline">9491010</pub-id>
          <pub-id pub-id-type="pmcid">PMC1508189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sampson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Raudenbush</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Earls</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Neighborhoods and violent crime: a multilevel study of collective efficacy</article-title>
          <source>Science</source>
          <year>1997</year>
          <month>08</month>
          <day>15</day>
          <volume>277</volume>
          <issue>5328</issue>
          <fpage>918</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1126/science.277.5328.918</pub-id>
          <pub-id pub-id-type="medline">9252316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Lochner</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Kawachi</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Neighborhood differences in social capital: a compositional artifact or a contextual construct?</article-title>
          <source>Health Place</source>
          <year>2003</year>
          <month>03</month>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pubmed.ncbi.nlm.nih.gov/12609471/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s1353-8292(02)00028-x</pub-id>
          <pub-id pub-id-type="medline">12609471</pub-id>
          <pub-id pub-id-type="pii">S135382920200028X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van den Berg</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Maas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Verheij</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Groenewegen</surname>
              <given-names>PP</given-names>
            </name>
          </person-group>
          <article-title>Green space as a buffer between stressful life events and health</article-title>
          <source>Soc Sci Med</source>
          <year>2010</year>
          <month>04</month>
          <volume>70</volume>
          <issue>8</issue>
          <fpage>1203</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2010.01.002</pub-id>
          <pub-id pub-id-type="medline">20163905</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(10)00067-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hox</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moerbeek</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van de Schoot</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Multilevel Analysis: Techniques and Applications, Third Edition</source>
          <year>2017</year>
          <publisher-loc>Milton Park, UK</publisher-loc>
          <publisher-name>Taylor &amp; Francis</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Craig</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gunnell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lawson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Macintyre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ogilvie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Petticrew</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using natural experiments to evaluate population health interventions: new Medical Research Council guidance</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2012</year>
          <month>12</month>
          <day>10</day>
          <volume>66</volume>
          <issue>12</issue>
          <fpage>1182</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22577181"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jech-2011-200375</pub-id>
          <pub-id pub-id-type="medline">22577181</pub-id>
          <pub-id pub-id-type="pii">jech-2011-200375</pub-id>
          <pub-id pub-id-type="pmcid">PMC3796763</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <article-title>Inter-ministerial Public Health Plan (PINSAP)</article-title>
          <source>Agencia de Salut Publica de Catalunya</source>
          <year>2014</year>
          <month>2</month>
          <day>14</day>
          <access-date>2022-12-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://salutpublica.gencat.cat/web/.content/minisite/aspcat/sobre_lagencia/pinsap/continguts_antics/pinsap-en.pdf">https://salutpublica.gencat.cat/web/.content/minisite/aspcat/sobre_lagencia/pinsap/continguts_antics/pinsap-en.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bohn-Goldbaum</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Grunseit</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bauman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Using natural experiments to improve public health evidence: a review of context and utility for obesity prevention</article-title>
          <source>Health Res Policy Syst</source>
          <year>2020</year>
          <month>05</month>
          <day>18</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>48</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://health-policy-systems.biomedcentral.com/articles/10.1186/s12961-020-00564-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12961-020-00564-2</pub-id>
          <pub-id pub-id-type="medline">32423438</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12961-020-00564-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7236508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leatherdale</surname>
              <given-names>ST</given-names>
            </name>
          </person-group>
          <article-title>Natural experiment methodology for research: a review of how different methods can support real-world research</article-title>
          <source>Int J Soc Res Methodol</source>
          <year>2018</year>
          <month>07</month>
          <day>02</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1080/13645579.2018.1488449</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diouf</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sarr</surname>
              <given-names>EN</given-names>
            </name>
            <name name-style="western">
              <surname>Sall</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Birregah</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bousso</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mbaye</surname>
              <given-names>SN</given-names>
            </name>
          </person-group>
          <article-title>Web scraping: state-of-the-art and areas of application</article-title>
          <source>Proceedings of the IEEE International Conference on Big Data (Big Data)</source>
          <year>2019</year>
          <conf-name>IEEE International Conference on Big Data (Big Data)</conf-name>
          <conf-date>December 09-12, 2019</conf-date>
          <conf-loc>Los Angeles, CA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/9005594/authors#authors"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/bigdata47090.2019.9005594</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>vanden Broucke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Baesens</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Practical Web Scraping for Data Science: Best Practices and Examples with Python</source>
          <year>2018</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Apress</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Nali</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bardier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Big data, natural language processing, and deep learning to detect and characterize illicit COVID-19 product sales: infoveillance study on Twitter and Instagram</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>08</month>
          <day>25</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>e20794</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/3/e20794/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20794</pub-id>
          <pub-id pub-id-type="medline">32750006</pub-id>
          <pub-id pub-id-type="pii">v6i3e20794</pub-id>
          <pub-id pub-id-type="pmcid">PMC7451110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
          </person-group>
          <article-title>A machine learning approach for the detection and characterization of illicit drug dealers on Instagram: model evaluation study</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>06</month>
          <day>15</day>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>e13803</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/6/e13803/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13803</pub-id>
          <pub-id pub-id-type="medline">31199298</pub-id>
          <pub-id pub-id-type="pii">v21i6e13803</pub-id>
          <pub-id pub-id-type="pmcid">PMC6598421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Michalski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Knauth</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Kaspar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Reiter</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Large-scale web scraping for problem gambling research: a case study of COVID-19 lockdown effects in Germany</article-title>
          <source>J Gambl Stud</source>
          <year>2023</year>
          <month>09</month>
          <day>27</day>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>1487</fpage>
          <lpage>504</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36707481"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10899-023-10187-1</pub-id>
          <pub-id pub-id-type="medline">36707481</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10899-023-10187-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9882744</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gregory</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Piff</surname>
              <given-names>PK</given-names>
            </name>
          </person-group>
          <article-title>Finding uncommon ground: extremist online forum engagement predicts integrative complexity</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <month>1</month>
          <day>19</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>e0245651</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0245651"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0245651</pub-id>
          <pub-id pub-id-type="medline">33465152</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-10891</pub-id>
          <pub-id pub-id-type="pmcid">PMC7815119</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kogan</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Bolon</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alcoba</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez-Marquez</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Mohanty</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz de Castañeda</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Wet markets and food safety: TripAdvisor for improved global digital surveillance</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>e11477</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2019/2/e11477/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11477</pub-id>
          <pub-id pub-id-type="medline">30932867</pub-id>
          <pub-id pub-id-type="pii">v5i2e11477</pub-id>
          <pub-id pub-id-type="pmcid">PMC6462893</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Oliveira</surname>
              <given-names>DV</given-names>
            </name>
            <name name-style="western">
              <surname>Albuquerque</surname>
              <given-names>UP</given-names>
            </name>
          </person-group>
          <article-title>Cultural evolution and digital media: diffusion of fake news about COVID-19 on Twitter</article-title>
          <source>SN Comput Sci</source>
          <year>2021</year>
          <month>08</month>
          <day>28</day>
          <volume>2</volume>
          <issue>6</issue>
          <fpage>430</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34485922"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s42979-021-00836-w</pub-id>
          <pub-id pub-id-type="medline">34485922</pub-id>
          <pub-id pub-id-type="pii">836</pub-id>
          <pub-id pub-id-type="pmcid">PMC8397611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaikwad</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Chaugule</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Patil</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Text mining methods and techniques</article-title>
          <source>Int J Comput Appl</source>
          <year>2014</year>
          <month>01</month>
          <day>16</day>
          <volume>85</volume>
          <issue>17</issue>
          <fpage>42</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.5120/14937-3507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zunic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis in health and well-being: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>01</month>
          <day>28</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e16023</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/1/e16023/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16023</pub-id>
          <pub-id pub-id-type="medline">32012057</pub-id>
          <pub-id pub-id-type="pii">v8i1e16023</pub-id>
          <pub-id pub-id-type="pmcid">PMC7013658</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gruebner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Sykora</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lowe</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Shankardass</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Trinquart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Galea</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mental health surveillance after the terrorist attacks in Paris</article-title>
          <source>Lancet</source>
          <year>2016</year>
          <month>05</month>
          <volume>387</volume>
          <issue>10034</issue>
          <fpage>2195</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(16)30602-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boon-Itt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Skunkan</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Public perception of the COVID-19 pandemic on Twitter: sentiment analysis and topic modeling study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>11</month>
          <day>11</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e21978</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/4/e21978/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21978</pub-id>
          <pub-id pub-id-type="medline">33108310</pub-id>
          <pub-id pub-id-type="pii">v6i4e21978</pub-id>
          <pub-id pub-id-type="pmcid">PMC7661106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Discovering health topics in social media using topic models</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>8</month>
          <day>1</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>e103408</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0103408"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0103408</pub-id>
          <pub-id pub-id-type="medline">25084530</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-00554</pub-id>
          <pub-id pub-id-type="pmcid">PMC4118877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KT</given-names>
            </name>
          </person-group>
          <source>Introduction to Geographic Information Systems</source>
          <year>2006</year>
          <publisher-loc>Chicago, IL</publisher-loc>
          <publisher-name>McGraw-Hill Higher Education</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shankardass</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jerrett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Milam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berhane</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McConnell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Social environment and asthma: associations with crime and No Child Left Behind programmes</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2011</year>
          <month>10</month>
          <day>11</day>
          <volume>65</volume>
          <issue>10</issue>
          <fpage>859</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21071562"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jech.2009.102806</pub-id>
          <pub-id pub-id-type="medline">21071562</pub-id>
          <pub-id pub-id-type="pii">jech.2009.102806</pub-id>
          <pub-id pub-id-type="pmcid">PMC4384703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hornby-Turner</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Peel</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Hubbard</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Health assets in older age: a systematic review</article-title>
          <source>BMJ Open</source>
          <year>2017</year>
          <month>05</month>
          <day>17</day>
          <volume>7</volume>
          <issue>5</issue>
          <fpage>e013226</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&amp;pmid=28515182"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2016-013226</pub-id>
          <pub-id pub-id-type="medline">28515182</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2016-013226</pub-id>
          <pub-id pub-id-type="pmcid">PMC5777471</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Finder of assets and health</article-title>
          <source>Public Health Agency of Catalonia (ASPCAT)</source>
          <access-date>2022-12-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://salutpublica.gencat.cat/ca/sobre_lagencia/Plans-estrategics/pinsap/Accions-eines-i-projectes-relacionats/actius-i-salut/cercador-dactius-i-salut/index.html#googtrans(ca|en)">https://salutpublica.gencat.cat/ca/sobre_lagencia/Plans-estrategics/pinsap/Accions-eines-i-projectes-relacionats/actius-i-salut/cercador-dactius-i-salut/index.html#googtrans(ca|en)</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sáinz-Ruiz</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Sanz-Valero</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gea-Caballero</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Melo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Suárez-Máximo</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez-Riera</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Dimensions of community assets for health. A systematised review and meta-synthesis</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2021</year>
          <month>05</month>
          <day>27</day>
          <volume>18</volume>
          <issue>11</issue>
          <fpage>5758</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph18115758"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph18115758</pub-id>
          <pub-id pub-id-type="medline">34072002</pub-id>
          <pub-id pub-id-type="pii">ijerph18115758</pub-id>
          <pub-id pub-id-type="pmcid">PMC8198194</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver-Parra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>González-Viana</surname>
              <given-names>A</given-names>
            </name>
            <collab>Grupo de Trabajo de Indicadores Básicos de Salud por Área Básica (GT-IBS)</collab>
          </person-group>
          <article-title>[Facilitating community oriented primary health care. Basic health indicators by small areas in Catalonia]</article-title>
          <source>Gac Sanit</source>
          <year>2020</year>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>204</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.elsevier.es/en/linksolver/ft/pii/S0213-9111(19)30157-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.gaceta.2019.05.012</pub-id>
          <pub-id pub-id-type="medline">31488325</pub-id>
          <pub-id pub-id-type="pii">S0213-9111(19)30157-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Web Scraping with Python: Collecting More Data from the Modern Web. 2nd edition</source>
          <year>2018</year>
          <month>4</month>
          <day>4</day>
          <publisher-loc>Sebastopol, CA</publisher-loc>
          <publisher-name>O′Reilly Media</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Munzert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rubba</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meißner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhuis</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Automated Data Collection with R: A Practical Guide to Web Scraping and Text Mining</source>
          <year>2015</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &amp; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>HTML Elements</article-title>
          <source>W3 Schools</source>
          <year>2023</year>
          <access-date>2023-12-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.w3schools.com/html/html_elements.asp">https://www.w3schools.com/html/html_elements.asp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>HTML Tags</article-title>
          <source>W3 Schools</source>
          <year>2023</year>
          <access-date>2023-12-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.w3schools.com/tags/">https://www.w3schools.com/tags/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <source>YouTube</source>
          <year>2021</year>
          <month>1</month>
          <day>23</day>
          <access-date>2023-12-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.youtube.com/watch?v=XsL8JDkH-ec">https://www.youtube.com/watch?v=XsL8JDkH-ec</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Web Scraping Basics</article-title>
          <source>Towards Data Science</source>
          <year>2020</year>
          <month>7</month>
          <day>15</day>
          <access-date>2023-12-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://towardsdatascience.com/web-scraping-basics-82f8b5acd45c">https://towardsdatascience.com/web-scraping-basics-82f8b5acd45c</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shafer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Python Tutorial: Web Scraping with BeautifulSoup and Requests</article-title>
          <source>YouTube</source>
          <year>2017</year>
          <month>11</month>
          <day>18</day>
          <access-date>2023-12-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.youtube.com/watch?v=ng2o98k983k">https://www.youtube.com/watch?v=ng2o98k983k</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Python homepage</article-title>
          <source>Python</source>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.python.org/">https://www.python.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reitz</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Requests documentation: release 2.31.0</article-title>
          <source>Build Media</source>
          <year>2023</year>
          <month>8</month>
          <day>18</day>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://buildmedia.readthedocs.org/media/pdf/requests/latest/requests.pdf">https://buildmedia.readthedocs.org/media/pdf/requests/latest/requests.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Beautiful Soup 4.12.0 documentation</article-title>
          <source>Beautiful Soup</source>
          <access-date>2022-12-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">https://www.crummy.com/software/BeautifulSoup/bs4/doc/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>User guide - pandas 2.1.4 documentation</article-title>
          <source>Pandas</source>
          <access-date>2022-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pandas.pydata.org/docs/user_guide/index.html#user-guide">https://pandas.pydata.org/docs/user_guide/index.html#user-guide</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>Targeted web scraping implementation evaluation</article-title>
          <source>GitHub</source>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/paugalvez/Targeted_webscraping_implementation_evaluation.git">https://github.com/paugalvez/Targeted_webscraping_implementation_evaluation.git</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feinerer</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Introduction to the tm Package Text Mining in R</article-title>
          <source>The Comprehensive R Archive Network</source>
          <year>2023</year>
          <month>2</month>
          <day>5</day>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/tm/vignettes/tm.pdf">https://cran.r-project.org/web/packages/tm/vignettes/tm.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rinker</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Goodrich</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kurkiewicz</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>qdap: bridging the gap between qualitative data and quantitative analysis</article-title>
          <source>The Comprehensive R Archive Network</source>
          <year>2023</year>
          <month>5</month>
          <day>11</day>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://CRAN.R-project.org/package=qdap">https://CRAN.R-project.org/package=qdap</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vijayarani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Janani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Text mining: open source tokenization tools – an analysis</article-title>
          <source>Adv Comput Intell Int J</source>
          <year>2016</year>
          <month>01</month>
          <day>30</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.5121/acii.2016.3104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <article-title>Text analysis spatial overlay analysis R</article-title>
          <source>GitHub</source>
          <access-date>2023-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/paugalvez/Text_Analysis_Spatial_Overlay_Analysis_R">https://github.com/paugalvez/Text_Analysis_Spatial_Overlay_Analysis_R</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gardiner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Geldenhuys</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Interventions to reduce social isolation and loneliness among older people: an integrative review</article-title>
          <source>Health Soc Care Community</source>
          <year>2018</year>
          <month>03</month>
          <day>13</day>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>147</fpage>
          <lpage>57</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eprints.whiterose.ac.uk/102610/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/hsc.12367</pub-id>
          <pub-id pub-id-type="medline">27413007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Galvez-Hernandez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>González-de Paz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Muntaner</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Primary care-based interventions addressing social isolation and loneliness in older people: a scoping review</article-title>
          <source>BMJ Open</source>
          <year>2022</year>
          <month>02</month>
          <day>04</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e057729</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&amp;pmid=35121608"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2021-057729</pub-id>
          <pub-id pub-id-type="medline">35121608</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2021-057729</pub-id>
          <pub-id pub-id-type="pmcid">PMC8819903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nicolle</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Social isolation and loneliness: the new geriatric giants: approach for primary care</article-title>
          <source>Can Fam Physician</source>
          <year>2020</year>
          <month>03</month>
          <volume>66</volume>
          <issue>3</issue>
          <fpage>176</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cfp.ca/cgi/pmidlookup?view=long&amp;pmid=32165464"/>
          </comment>
          <pub-id pub-id-type="medline">32165464</pub-id>
          <pub-id pub-id-type="pii">66/3/176</pub-id>
          <pub-id pub-id-type="pmcid">PMC8302356</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>National Academies of Sciences, Engineering, and Medicine</collab>
            <collab>Division of Behavioral and Social Sciences and Education</collab>
            <collab>Health and Medicine Division</collab>
            <collab>Board on Behavioral, Cognitive, and Sensory Sciences</collab>
            <collab>Board on Health Sciences Policy</collab>
            <collab>Committee on the Health and Medical Dimensions of Social Isolation and Loneliness in Older Adults</collab>
          </person-group>
          <source>Social Isolation and Loneliness in Older Adults: Opportunities for the Health Care System</source>
          <year>2020</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <article-title>World population ageing 2015</article-title>
          <source>United Nations, Department of Economic and Social Affairs, Population Division</source>
          <year>2015</year>
          <access-date>2023-01-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.un.org/en/development/desa/population/publications/pdf/ageing/WPA2015_Report.pdf">https://www.un.org/en/development/desa/population/publications/pdf/ageing/WPA2015_Report.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <article-title>Classification of gender</article-title>
          <source>Statistics Canada</source>
          <access-date>2023-01-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www23.statcan.gc.ca/imdb/p3VD.pl?Function=getVD&amp;TVD=1326727&amp;CVD=1326727&amp;CLV=0&amp;MLV=1&amp;D=1">https://www23.statcan.gc.ca/imdb/p3VD.pl?Function=getVD&amp;TVD=1326727&amp;CVD=1326727&amp;CLV=0&amp;MLV=1&amp;D=1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vijayan</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Bindu</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Parameswaran</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive study of text classification algorithms</article-title>
          <source>Proceedings of the International Conference on Advances in Computing, Communications and Informatics (ICACCI)</source>
          <year>2017</year>
          <conf-name>International Conference on Advances in Computing, Communications and Informatics (ICACCI)</conf-name>
          <conf-date>September 13-16, 2017</conf-date>
          <conf-loc>Udupi, India</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/8125990"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/icacci.2017.8125990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radovanović</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ivanović</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Text mining: approaches and applications</article-title>
          <source>Novi Sad J Math</source>
          <year>2008</year>
          <volume>38</volume>
          <issue>3</issue>
          <fpage>227</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://sunsite.icm.edu.pl/packages/EMIS/journals/NSJOM/Papers/38_3/NSJOM_38_3_227_234.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Puschmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Haim</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Topic-specific dictionaries</article-title>
          <source>Automated Content Analysis with R</source>
          <access-date>2023-01-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://content-analysis-with-r.com/4-dictionaries.html">https://content-analysis-with-r.com/4-dictionaries.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Agreement, the f-measure, and reliability in information retrieval</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2005</year>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>296</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/15684123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M1733</pub-id>
          <pub-id pub-id-type="medline">15684123</pub-id>
          <pub-id pub-id-type="pii">M1733</pub-id>
          <pub-id pub-id-type="pmcid">PMC1090460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pebesma</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bivand</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>Classes and methods for spatial data in R</article-title>
          <source>R News</source>
          <year>2005</year>
          <month>11</month>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>9</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://geobgu.xyz/r-2019/resources/Rnews_2005-2.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pebesma</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Simple features for R: standardized support for spatial vector data</article-title>
          <source>R J</source>
          <year>2018</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>439</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.32614/rj-2018-009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bivand</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Keitt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rowlingson</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>rgdal: bindings for the 'geospatial' data abstraction library</article-title>
          <source>The Comprehensive R Archive Network</source>
          <year>2017</year>
          <month>11</month>
          <day>21</day>
          <access-date>2023-01-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://cran.nexr.com/web/packages/rgdal/index.html">http://cran.nexr.com/web/packages/rgdal/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bivand</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rundel</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>rgeos: interface to geometry engine - open source ('GEOS')</article-title>
          <source>rgeos</source>
          <year>2023</year>
          <access-date>2023-01-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://rgeos.r-forge.r-project.org/">https://rgeos.r-forge.r-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wickham</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>ggplot2: Elegant Graphics for Data Analysis</source>
          <year>2009</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bivand</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Pebesma</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez-Rubio</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>Applied Spatial Data Analysis with R</source>
          <year>2013</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jacoby</surname>
              <given-names>WG</given-names>
            </name>
          </person-group>
          <article-title>Loess: a nonparametric, graphical tool for depicting relationships between variables</article-title>
          <source>Elect Stud</source>
          <year>2000</year>
          <month>12</month>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>577</fpage>
          <lpage>613</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S0261-3794(99)00028-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0261-3794(99)00028-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benach</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vives</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Amable</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vanroelen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tarafa</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Muntaner</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Precarious employment: understanding an emerging social determinant of health</article-title>
          <source>Annu Rev Public Health</source>
          <year>2014</year>
          <volume>35</volume>
          <fpage>229</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-032013-182500</pub-id>
          <pub-id pub-id-type="medline">24641559</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lozano-Fuentes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Elizondo-Quiroga</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Farfan-Ale</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Loroño-Pino</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Rejon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez-Carro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lira-Zumbardo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Najera-Vazquez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez-Salas</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Calderon-Martinez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dominguez-Galera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mis-Avila</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Beaty</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eisen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Use of Google Earth to strengthen public health capacity and facilitate management of vector-borne diseases in resource-poor environments</article-title>
          <source>Bull World Health Organ</source>
          <year>2008</year>
          <month>09</month>
          <day>01</day>
          <volume>86</volume>
          <issue>9</issue>
          <fpage>718</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18797648"/>
          </comment>
          <pub-id pub-id-type="doi">10.2471/blt.07.045880</pub-id>
          <pub-id pub-id-type="medline">18797648</pub-id>
          <pub-id pub-id-type="pii">S0042-96862008000900016</pub-id>
          <pub-id pub-id-type="pmcid">PMC2649496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krotov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Tutorial: legality and ethics of web scraping</article-title>
          <source>Commun Assoc Inf Syst</source>
          <year>2020</year>
          <month>12</month>
          <volume>47</volume>
          <fpage>539</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aisel.aisnet.org/cgi/viewcontent.cgi?article=4240&amp;context=cais"/>
          </comment>
          <pub-id pub-id-type="doi">10.17705/1CAIS.04724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sorensen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Thain</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vasserman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Measuring and mitigating unintended bias in text classification</article-title>
          <source>Proceedings of the 2018 AAAI/ACM Conference on AI, Ethics, and Society</source>
          <year>2018</year>
          <conf-name>AIES '18</conf-name>
          <conf-date>February 2-3, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3278721.3278729"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3278721.3278729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Watt</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>On becoming a qualitative researcher: the value of reflexivity</article-title>
          <source>Qual Report</source>
          <year>2007</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>82</fpage>
          <lpage>101</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.46743/2160-3715/2007.1645"/>
          </comment>
          <pub-id pub-id-type="doi">10.46743/2160-3715/2007.1645</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nilsen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bernhardsson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Context matters in implementation science: a scoping review of determinant frameworks that describe contextual determinants for implementation outcomes</article-title>
          <source>BMC Health Serv Res</source>
          <year>2019</year>
          <month>03</month>
          <day>25</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>189</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-019-4015-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-019-4015-3</pub-id>
          <pub-id pub-id-type="medline">30909897</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-019-4015-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6432749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boegershausen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Datta</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Borah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stephen</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Fields of gold: scraping web data for marketing insights</article-title>
          <source>J Mark</source>
          <year>2022</year>
          <month>08</month>
          <day>02</day>
          <volume>86</volume>
          <issue>5</issue>
          <fpage>1</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1177/00222429221100750</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
