<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i2e14952</article-id>
      <article-id pub-id-type="pmid">32234706</article-id>
      <article-id pub-id-type="doi">10.2196/14952</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Classification of Health-Related Social Media Posts: Evaluation of Post Content–Classifier Models and Analysis of User Demographics</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Davoudi</surname>
            <given-names>Anis</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Allem</surname>
            <given-names>Jon-Patrick</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rivas</surname>
            <given-names>Ryan</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science and Engineering</institution>
            <institution>University of California, Riverside</institution>
            <addr-line>363 Winston Chung Hall</addr-line>
            <addr-line>900 University Ave</addr-line>
            <addr-line>Riverside, CA, 92521</addr-line>
            <country>United States</country>
            <phone>1 951 827 2838</phone>
            <email>rriva002@ucr.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5590-0274</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sadah</surname>
            <given-names>Shouq A</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5203-4355</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Yuhang</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0495-0754</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hristidis</surname>
            <given-names>Vagelis</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8679-4988</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science and Engineering</institution>
        <institution>University of California, Riverside</institution>
        <addr-line>Riverside, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ryan Rivas <email>rriva002@ucr.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Apr-Jun</season>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>1</day>
        <month>4</month>
        <year>2020</year>
      </pub-date>
      <volume>6</volume>
      <issue>2</issue>
      <elocation-id>e14952</elocation-id>
      <history>
        <date date-type="received">
          <day>5</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>8</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>1</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Ryan Rivas, Shouq A Sadah, Yuhang Guo, Vagelis Hristidis. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 01.04.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://publichealth.jmir.org/2020/2/e14952" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The increasing volume of health-related social media activity, where users connect, collaborate, and engage, has increased the significance of analyzing how people use health-related social media.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to classify the content (eg, posts that share experiences and seek support) of users who write health-related social media posts and study the effect of user demographics on post content.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We analyzed two different types of health-related social media: (1) health-related online forums—WebMD and DailyStrength—and (2) general online social networks—Twitter and Google+. We identified several categories of post content and built classifiers to automatically detect these categories. These classifiers were used to study the distribution of categories for various demographic groups.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We achieved an accuracy of at least 84% and a balanced accuracy of at least 0.81 for half of the post content categories in our experiments. In addition, 70.04% (4741/6769) of posts by male WebMD users asked for advice, and male users’ WebMD posts were more likely to ask for medical advice than female users’ posts. The majority of posts on DailyStrength shared experiences, regardless of the gender, age group, or location of their authors. Furthermore, health-related posts on Twitter and Google+ were used to share experiences less frequently than posts on WebMD and DailyStrength.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We studied and analyzed the content of health-related social media posts. Our results can guide health advocates and researchers to better target patient populations based on the application type. Given a research question or an outreach goal, our results can be used to choose the best online forums to answer the question or disseminate a message.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>demographics</kwd>
        <kwd>classification</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>There is a huge amount of knowledge waiting to be extracted in health-related online social networks and forums, which we collectively refer to as social media. Health-related social media store the interactions of users who are interested in health-related topics [<xref ref-type="bibr" rid="ref1">1</xref>]. These users share their experiences, share information of friends and family, or seek help for a wide range of health issues [<xref ref-type="bibr" rid="ref1">1</xref>]. In the United States, more than 60 million Americans have read or collaborated in <italic>health 2.0</italic> resources [<xref ref-type="bibr" rid="ref2">2</xref>]. In addition, 40% of Americans have doubted a professional opinion when it conflicted with the opinions expressed in health-related social media [<xref ref-type="bibr" rid="ref2">2</xref>]. Health-related social media widen access to health information for the public, regardless of individuals’ race, age, locality, or education [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
        <p>In this study, we evaluated the content of posts in various health-related social media. We analyzed two types of health-related social media: (1) health-related online forums: WebMD and DailyStrength and (2) general social networks: Google+ and Twitter. This was a 4-step process comprising data collection, identifying post content categories, performing classification experiments, and performing a demographics analysis. We first collected large datasets of posts from each source and identified several categories. Afterward, we identified meaningful categories from randomly selected posts from each source. In our classification experiments, we labeled data from each source and trained classifiers to identify post content categories. Finally, we used classifiers trained on our labeled data to identify categories in the remaining data and analyzed how often posts in these categories are made by various demographic groups.</p>
        <p>The goal of this study was to provide researchers with information and tools to support further research. For example, researchers looking for clinical trial participants can use DailyStrength, where users often share experiences about a particular condition, and health advocates seeking to spread awareness about a condition that affects men can use WebMD, where men often ask for advice. To this end, we also made comparisons between platforms to suggest where such a researcher might begin looking. The classifier models built in this study can assist with this task as well as other analyses involving health-related online postings.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Analysis of Health-Related Social Media</title>
          <p>Many studies have been performed to characterize health-related social media communities. Hackworth and Kunz [<xref ref-type="bibr" rid="ref3">3</xref>] reported that 80% of Americans have searched the internet for health-related information, more than 60 million Americans are consumers of social networks in the Web 2.0 environment (health 2.0), and consumers, especially those with chronic conditions, are leading the health 2.0 movement by seeking clinical knowledge and emotional support. Wiley et al [<xref ref-type="bibr" rid="ref4">4</xref>] studied the impact of different characteristics of various social media forums on drug-related content and demonstrated that the characteristics of a social media platform affect several aspects of discussion. Eichstaedt et al [<xref ref-type="bibr" rid="ref5">5</xref>] predicted the county-level heart disease mortality by capturing the psychological characteristics of local communities through expressed text in Twitter. However, these studies do not describe or compare specific demographics in terms of their post content.</p>
          <p>Further work has focused on categorizing health-related posts based on their content. Yu et al [<xref ref-type="bibr" rid="ref6">6</xref>] performed a preliminary content analysis of D/deaf and hard of hearing discussion forum, AllDeaf, to observe different types of social support behaviors and identify social support features for a future text classification task. Reavley and Pilkington [<xref ref-type="bibr" rid="ref7">7</xref>] analyzed the content of tweets related to depression and schizophrenia, finding that tweets about depression mostly discussed consumer resources and advertisements, whereas tweets about schizophrenia mostly raised awareness and reported research findings. Lee et al [<xref ref-type="bibr" rid="ref8">8</xref>] analyzed the content of tweets from health-related Twitter users, finding that they tweet about testable claims and personal experiences. Lopes and Da Silva [<xref ref-type="bibr" rid="ref9">9</xref>] collected posts from a health-related online forum, MedHelp, and used them to propose and refine a scheme for manually classifying health-related forum posts into 4 categories and a total of 23 subcategories. Our work was built upon these studies by defining our own categories of post content, some of which have analogues in these studies.</p>
        </sec>
        <sec>
          <title>Health-Related Demographic Analysis</title>
          <p>Other work has compared health issues between demographics or examined the demographics within a population participating in health-related research. Krueger et al [<xref ref-type="bibr" rid="ref10">10</xref>] studied the mortality attributable to a low education level in the United States across several demographics, where they found people with an education level below a high school degree to have a higher mortality rate. Anderson-Bill et al [<xref ref-type="bibr" rid="ref11">11</xref>] examined the demographics and behavioral and psychosocial characteristics of <italic>Web-health users</italic> (adults who use the Web to find information on health behavior and behavior change) recruited for a Web-based nutrition, physical activity, and weight gain prevention intervention. Their results suggest that users participating in online health interventions are likely “middle-aged, well-educated, upper middle-class women whose detrimental health behaviors put them at risk of obesity, heart disease, some cancers, and diabetes” [<xref ref-type="bibr" rid="ref11">11</xref>]. These studies describe the demographics of the populations in their studies but do not describe the demographics of health-related social media users.</p>
          <p>Previous work has focused on characterizing demographics on health-related social media. Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>] analyzed the demographics of health-related social media and found that users of drug review websites and health-related online forums are predominantly women, health-related social media users are generally older than general social media users, black users are underrepresented in health-related social media, users in areas with better access to health care participate more in health-related social media, and the writing level of health-related social media users is lower than the reading level of the general population. Sadah et al [<xref ref-type="bibr" rid="ref13">13</xref>] also performed a demographic-based content analysis of health-related social media posts to extract top distinctive terms, top drugs and disorders, sentiment, and emotion, finding that the most popular topic varied by demographic, for example, pregnancy was popular with female users, whereas cardiac problems, HIV, and back pain were the most discussed topics by male users. They also found that users with a higher writing level were less likely to express anger in their posts. We expanded upon this work by characterizing and comparing the demographics of health-related social media websites in terms of the frequency of post content categories.</p>
        </sec>
      </sec>
      <sec>
        <title>Text Classification in Social Media</title>
        <p>Text classification is frequently employed by researchers to gain insights into social media users and trends, both in and out of health-related settings. Sadilek et al [<xref ref-type="bibr" rid="ref14">14</xref>] studied the spread of infectious diseases by analyzing Twitter data using a support vector machine (SVM) model. Huh et al [<xref ref-type="bibr" rid="ref15">15</xref>] developed a naïve Bayes model to help WebMD moderators find posts they would likely respond to. Nikfarjam et al [<xref ref-type="bibr" rid="ref16">16</xref>] proposed a machine learning–based tagger to extract adverse drug reactions from health-related social media. Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>] estimated the gender and ethnicity of Twitter users using the reported first name and last name. Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>] expanded upon the work of Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>] by considering screen names in estimating gender. In this study, we used text classification techniques to identify categories of post content in health-related social media and used the techniques proposed in the studies by Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>] and Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>] to study the frequency of these categories within several demographics.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Datasets</title>
        <p>For health-related online forums, we selected 2 different websites, WebMD and DailyStrength. The reason for selecting 2 health-related online forums is to cover the different types of health-related online forums that they each represent. Although WebMD consists of multiple health communities where people ask questions and get responses from the community members [<xref ref-type="bibr" rid="ref18">18</xref>], DailyStrength enables patients to exchange experiences and treatments, discuss daily struggles and successes, and receive emotional support [<xref ref-type="bibr" rid="ref19">19</xref>]. For each post collected from these websites, we extracted the URL, title, author’s username, post time, the body of the post, and the name of the message board. For each user of a collected post, we also collected the author’s age, friends, gender, and location, where applicable. As crawling of these sites has been performed at different times, some of the data we have collected do not reflect the current availability of certain attributes because of website format changes, for example, age and gender are currently available from WebMD user profiles but were not available before. In this study, the selection of demographic attributes we used for a source is based on the availability reflected by the majority of posts collected from that source, for example, most of the WebMD posts in our data were collected before age and gender were available, thus we did not use these attributes for an analysis of WebMD user demographics. We restricted the posts used from these sources to the first post in each thread. In our analysis, we used the post body, post title, message board name, and username from WebMD and the post body, post title, message board name, and user’s gender, age, and location from DailyStrength.</p>
        <p>For general social networks, we chose Twitter and Google+ as they offer interfaces to easily collect their data (in contrast to Facebook). For each Twitter post, we collected the post content, post time, location, and the author’s username and location. For each Google+ post we collected the title, post time, update time, the post content, the location, and the author’s username, first and last names, age, gender, and location. As Twitter and Google+ are general social networks, we used 274 representative health-related keywords to filter them as follows: (1) Drugs: from the most prescriptions dispensed from RxList [<xref ref-type="bibr" rid="ref20">20</xref>], we selected the 200 most popular drugs. By removing the variants of the same drug (eg, different milligram dosages), the final list of drugs contained 124 unique drug names. (2) Hashtags: 11 popular health-related Twitter hashtags, such as #BCSM (Breast Cancer and Social Media). (3) Disorders: 81 frequently discussed disorders, such as AIDS and asthma. (4) Pharmaceuticals: the names of the 12 largest pharmaceutical companies, such as Novartis. (5) Insurance: the names of the 44 biggest insurance companies, such as Aetna and Shield. (6) General health-related keywords “healthcare” and “health insurance.” To reach the final keyword counts for hashtags, disorders, pharmaceuticals, and insurance, we sampled each keyword from a larger list for each of these categories and kept keywords with a high ratio of health-related posts. In our analysis, we used the tweet body, user’s first and last name, and user’s location from Twitter and post body, post title, and user’s gender, age, first and last name, and location from Google+.</p>
        <p>To filter Twitter with the health-related keyword list to retrieve relevant tweets for TwitterHealth, we used the Twitter streaming application programming interface (API) [<xref ref-type="bibr" rid="ref21">21</xref>]. Similarly, we used Google+ API [<xref ref-type="bibr" rid="ref22">22</xref>] to extract the relevant posts for Google+Health. For health-related online forums WebMD and DailyStrength, we built a crawler for each website in Java using jsoup [<xref ref-type="bibr" rid="ref23">23</xref>], a library to extract and parse HTML content. <xref ref-type="table" rid="table1">Table 1</xref> lists for each source the number of posts collected, the date ranges of collected posts, and whether the demographic attributes used in this study are present, and <xref ref-type="table" rid="table2">Table 2</xref> lists the distribution of demographics for each source across each demographic attribute. For all 4 of these sources, we did not specifically focus our search on English-language posts aside from using English drug names; however, the majority of posts collected from these sources were in the English language.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>List of all sources used with their number of posts, date range of posts, and the available demographic attributes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="130"/>
            <col width="240"/>
            <col width="150"/>
            <col width="60"/>
            <col width="160"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Source</td>
                <td>Number of posts</td>
                <td>Date range</td>
                <td>Gender</td>
                <td>Age</td>
                <td>Ethnicity</td>
                <td>Location</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>TwitterHealth [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                <td>11,637,888</td>
                <td>May 2, 2013 to November 11, 2013</td>
                <td>Gender classifier [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>No<sup>a</sup></td>
                <td>Ethnicity classifier [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>Yes<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Google+Health [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                <td>186,666</td>
                <td>August 24, 2009 to January 5, 2014</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>Ethnicity classifier [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>DailyStrength [<xref ref-type="bibr" rid="ref26">26</xref>]</td>
                <td>1,319,622</td>
                <td>June 21, 2006 to December 3, 2017</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>No</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>WebMD [<xref ref-type="bibr" rid="ref27">27</xref>]</td>
                <td>318,297</td>
                <td>December 24, 2006 to May 11, 2019</td>
                <td>Gender classifier [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>No</td>
                <td>No</td>
                <td>No</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The demographic attribute is not provided by the source and no classifier is used because of low accuracy.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>The demographic attribute is provided by the source.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Demographics of users from each source.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="190"/>
            <col width="190"/>
            <col width="210"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attribute and demographic</td>
                <td>TwitterHealth, %</td>
                <td>Google+Health, %</td>
                <td>DailyStrength, n (%)</td>
                <td>WebMD, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>48.19<sup>a</sup></td>
                <td>64.64<sup>a</sup></td>
                <td>95,269 (17.26)<sup>b</sup></td>
                <td>6769 (32.41)<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>51.81<sup>a</sup></td>
                <td>35.36<sup>a</sup></td>
                <td>456,600 (82.74)<sup>b</sup></td>
                <td>14,117 (67.59)<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Age (years)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0-17</td>
                <td>N/A<sup>c</sup></td>
                <td>3.42<sup>a</sup></td>
                <td>6656 (1.33)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-34</td>
                <td>N/A</td>
                <td>53.21<sup>a</sup></td>
                <td>187,966 (37.55)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>35-44</td>
                <td>N/A</td>
                <td>21.89<sup>a</sup></td>
                <td>126,646 (25.30)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>45-64</td>
                <td>N/A</td>
                <td>19.02<sup>a</sup></td>
                <td>149,487 (29.86)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥65</td>
                <td>N/A</td>
                <td>2.46<sup>a</sup></td>
                <td>29,847 (5.96)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Ethnicity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td>3.24<sup>a</sup></td>
                <td>5.60<sup>a</sup></td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black</td>
                <td>0.30<sup>a</sup></td>
                <td>0.30<sup>a</sup></td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>23.50<sup>a</sup></td>
                <td>17.40<sup>a</sup></td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>73.00<sup>a</sup></td>
                <td>76.60<sup>a</sup></td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Northeast</td>
                <td>165,531 (19.83)<sup>d</sup></td>
                <td>2598 (17.86)<sup>d</sup></td>
                <td>73,221 (19.58)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>174,620 (20.92)<sup>d</sup></td>
                <td>2393 (16.45)<sup>d</sup></td>
                <td>84,302 (22.55)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>313,350 (37.53)<sup>d</sup></td>
                <td>4863 (33.44)<sup>d</sup></td>
                <td>123,556 (33.05)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>181,400 (21.73)<sup>d</sup></td>
                <td>4690 (32.25)<sup>d</sup></td>
                <td>92,809 (24.82)<sup>b</sup></td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Based on Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Calculated with user data collected or estimated from this study.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>Calculated from user counts reported in the study by Sadah et al [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Identifying Post Contents</title>
        <p>From each source, we randomly selected 500 posts. We then manually identified the different categories of shared content for each type of health-related social media. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, we identified 9 different categories. The first 4 categories were identified for both types of health-related social media (hence, all 4 sources). Of these first 4 categories, 3 were also identified by Lopes and Da Silva [<xref ref-type="bibr" rid="ref9">9</xref>], for example, <italic>share experiences</italic>, which we defined as posts in which a user shared a personal experience related to a health-related topic. This is similar to their <italic>sharing personal experiences</italic> category, except that we did not restrict our definition to experiences shared in response to another post. <italic>About family</italic> has no equivalent in their scheme, but it can be covered by other categories that they have defined, for example, by asking a specific question about or expressing sadness over a family member’s illness. Our share experiences category was also similar to categories in other work, for example, the <italic>personal experience of mental illness</italic> category in the study by Reavley and Pilkington [<xref ref-type="bibr" rid="ref7">7</xref>], the <italic>personal</italic> category from Lee et al [<xref ref-type="bibr" rid="ref8">8</xref>], the <italic>personal event</italic> category from Robillard et al [<xref ref-type="bibr" rid="ref28">28</xref>], and the <italic>first-hand experience</italic> category from Alvaro et al [<xref ref-type="bibr" rid="ref29">29</xref>]. As Twitter and Google+ are more news-based social media, we identified 5 additional categories from these sources. <italic>Educational material</italic> can be considered equivalent to the <italic>teaching</italic> category defined by Lopes and Da Silva [<xref ref-type="bibr" rid="ref9">9</xref>]. Despite the differences between the categories we defined and those proposed by Lopes and Da Silva [<xref ref-type="bibr" rid="ref9">9</xref>], we believed that our categories are sufficient for a <italic>proof of concept</italic> for automatic post content category classification in the two types of health-related social media that we investigated. It should be noted that the identification of specific experiences is outside the scope of this study; the <italic>share experiences</italic> category is a catch-all for any experiences shared in a health-related post from any source.</p>
        <p>We asked 3 graduate students to label the selected data from WebMD, Twitter, and Google+; we used a majority vote as the final result for each of these sources. <xref ref-type="table" rid="table4">Table 4</xref> lists the intercoder agreement as given by a Krippendorff alpha for our labeled datasets from WebMD, Twitter, and Google+. The selected DailyStrength data were labeled by the labeler with the highest agreement with the majority averaged over each category from the other 3 sources (average alpha=.680). As shown in <xref ref-type="table" rid="table5">Table 5</xref>, the distribution of categories in each source is different, for example, the share experiences category is more common in health-related online forums (WebMD and DailyStrength).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>List of all identified categories for health-related online forums and general social networks.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="240"/>
            <col width="230"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Health-related online forums</td>
                <td>General social networks</td>
                <td>Example</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Share experiences</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“I could not work after Tylenol.”</p>
                    </list-item>
                    <list-item>
                      <p>“I have taken Lipitor every day.”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Ask for specific medical advice or information</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Is honey allowed for diabetics?”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Request or give psychological support</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“I hope your diabetes is under control.”</p>
                    </list-item>
                    <list-item>
                      <p>“We’re thinking of you.”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>About family (not about self)</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“My son is now nine months old and teething like crazy.”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Share news</td>
                <td>No</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Kaiser Permanente Invites Software Developers To Build Apps—Forbes. http://feedly.com/k/Zojwq”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Jokes</td>
                <td>No</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Got any jokes about Sodium Hypobromite? NaBro.”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Advertisements</td>
                <td>No</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Check out these two vitamins for one recipe! http://bit.ly/1471dbn”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Personal opinion</td>
                <td>No</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Main frustration of lupus is losing the ability to do things that used to be normal”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Educational material</td>
                <td>No</td>
                <td>Yes</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Side Effects of Alzheimer’s and Dementia Drugs http://bit.ly/cK7L1f”</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Intercoder agreement for our labeled datasets (Krippendorff alpha).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="520"/>
            <col width="120"/>
            <col width="170"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>WebMD</td>
                <td>TwitterHealth</td>
                <td>Google+Health</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Share experiences</td>
                <td>0.349</td>
                <td>0.446</td>
                <td>0.109</td>
              </tr>
              <tr valign="top">
                <td>Ask for specific medical advice or information</td>
                <td>0.768</td>
                <td>0.225</td>
                <td>0.108</td>
              </tr>
              <tr valign="top">
                <td>Request or give psychological support</td>
                <td>0.219</td>
                <td>0.090</td>
                <td>−0.007</td>
              </tr>
              <tr valign="top">
                <td>About family (not about self)</td>
                <td>0.736</td>
                <td>0.322</td>
                <td>−0.010</td>
              </tr>
              <tr valign="top">
                <td>Share news</td>
                <td>N/A<sup>a</sup></td>
                <td>0.083</td>
                <td>0.083</td>
              </tr>
              <tr valign="top">
                <td>Jokes</td>
                <td>N/A</td>
                <td>0.177</td>
                <td>0.029</td>
              </tr>
              <tr valign="top">
                <td>Advertisement</td>
                <td>N/A</td>
                <td>0.220</td>
                <td>0.107</td>
              </tr>
              <tr valign="top">
                <td>Personal opinion</td>
                <td>N/A</td>
                <td>0.103</td>
                <td>0.038</td>
              </tr>
              <tr valign="top">
                <td>Educational material</td>
                <td>N/A</td>
                <td>0.164</td>
                <td>0.091</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Percentages of categories in each source from the labeled data (N=500).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="130"/>
            <col width="150"/>
            <col width="160"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>WebMD, n (%)</td>
                <td>DailyStrength, n (%)</td>
                <td>TwitterHealth, n (%)</td>
                <td>Google+Health, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Share experiences</td>
                <td>236 (47.2)</td>
                <td>400 (80.0)</td>
                <td>74 (14.8)</td>
                <td>65 (13.0)</td>
              </tr>
              <tr valign="top">
                <td>Ask for specific medical advice or information</td>
                <td>270 (54.0)</td>
                <td>173 (34.6)</td>
                <td>3 (0.6)</td>
                <td>10 (2.0)</td>
              </tr>
              <tr valign="top">
                <td>Request or give psychological support</td>
                <td>126 (25.2)</td>
                <td>247 (49.4)</td>
                <td>9 (1.8)</td>
                <td>7 (1.4)</td>
              </tr>
              <tr valign="top">
                <td>About family (not about self)</td>
                <td>68 (13.6)</td>
                <td>37 (7.4)</td>
                <td>5 (1.0)</td>
                <td>34 (6.8)</td>
              </tr>
              <tr valign="top">
                <td>Share news</td>
                <td>N/A<sup>a</sup></td>
                <td>N/A</td>
                <td>56 (11.2)</td>
                <td>145 (28.9)</td>
              </tr>
              <tr valign="top">
                <td>Jokes</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>38 (7.6)</td>
                <td>33 (6.6)</td>
              </tr>
              <tr valign="top">
                <td>Advertisement</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>26 (5.2)</td>
                <td>70 (14.0)</td>
              </tr>
              <tr valign="top">
                <td>Personal opinion</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>35 (7.0)</td>
                <td>84 (16.8)</td>
              </tr>
              <tr valign="top">
                <td>Educational material</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>36 (7.2)</td>
                <td>137 (25.7)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Bot Filtering</title>
        <p>We examined the impact of automated accounts (ie, <italic>bots</italic>) on our study using OSoMe’s Botometer (formerly BotOrNot, Indiana University) [<xref ref-type="bibr" rid="ref30">30</xref>], a tool that estimates how likely a Twitter account is to be a bot. We used the Botometer API to score each account that has a tweet in our initial sample of 500. The API assigned each of the 345 accounts that were still active a score in the range 0 to 1, with higher scores corresponding to a higher likelihood of an automated account. We manually evaluated each account with a score above 0.5. With this threshold, which was chosen because it is a natural choice that avoids possible bias from a more arbitrary choice of threshold value, we found a total of 33 likely bot accounts. We found that tweets from these accounts make up a substantial portion of the categories share news (11 tweets), advertisement (12 tweets), and educational material (10 tweets). As Botometer’s API rate limit makes removing all bot tweets from our Twitter corpus of over 11 million tweets unfeasible, we instead randomly selected 1000 posts from each day in the date range of our Twitter data. For each author of these selected posts, we again used Botometer to evaluate the likelihood of an automated account, removing tweets from accounts with a score above 0.5 for a total of 142,411 tweets used in our analysis.</p>
        <p>We also manually examined 100 posts each from WebMD and DailyStrength to determine the prevalence of bots on these websites, which consisted of one of the authors reading each of these posts and determining whether or not it appeared to be posted by a spambot. In the context of online forums, a spambot is an automated agent that posts promotional content [<xref ref-type="bibr" rid="ref31">31</xref>]. By this criterion, none of the posts examined appeared to have been posted by a bot. Although this does not guarantee that there are no posts from bots in the data from these websites used in our study, it does suggest that posts from bots may be much less prevalent in these sources, likely because of the smaller volume of posts and more active moderation compared with Twitter and Google+.</p>
      </sec>
      <sec>
        <title>Building Post Content Classifiers</title>
        <p>For each category, we performed binary classification experiments with three classifier algorithms: random forest [<xref ref-type="bibr" rid="ref32">32</xref>], linear SVM [<xref ref-type="bibr" rid="ref33">33</xref>], and convolutional neural network (CNN) [<xref ref-type="bibr" rid="ref34">34</xref>]. We first extracted and concatenated the features shown in <xref ref-type="table" rid="table6">Table 6</xref>. These features include the title of a post, the main text of a post (body), and the name of the message board that contains the post (board name). For the random forest and SVM classifiers, we converted the features to a term frequency-inverse document frequency vector with stop words removed and the remaining words lemmatized. For the CNN classifier, we converted the features to sets of fastText [<xref ref-type="bibr" rid="ref35">35</xref>] vectors pretrained on Wikipedia. For all classifiers, we applied class weights to the training data such that the weight of the positive class (the post is in the category) is balanced with the weight of the negative class (the post is not in the category). These weights are used with random forest and SVM according to their implementations by Pedregosa et al [<xref ref-type="bibr" rid="ref36">36</xref>], whereas CNN uses oversampling of the least frequent class as recommended by Buda et al [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>To build the classifiers, we excluded the categories where the percentage is less than 10.0% (50/500), and for the rest, we first split the labeled data to two datasets as follows: (1) a training dataset (450 posts) and (2) a test dataset (50 posts), held out for a final test after training is complete. Afterward, for each classifier algorithm, we trained each classifier by varying the hyperparameters shown in <xref ref-type="table" rid="table7">Table 7</xref>, considering each combination of hyperparameter values. For all combinations, we performed a 5-fold cross-validation on the training dataset to select the combination of hyperparameter values with the highest balanced accuracy [<xref ref-type="bibr" rid="ref38">38</xref>]. Finally, we used these hyperparameter values to create a model trained on the full training dataset and tested this model on the test dataset that was held out before the cross-validation experiments. Note that we did not use a nested cross-validation, as our goal in these experiments was to find a single combination of hyperparameter values that we could use to apply a sufficiently accurate classifier model to the rest of our data.</p>
        <p><xref ref-type="table" rid="table8">Table 8</xref> shows the classifiers’ accuracy for WebMD, DailyStrength, Twitter, and Google+. We have shown only the classifiers for categories that have more than 10% of labeled data.</p>
        <p>For the remainder of our analysis, we only considered source-category combinations with a classifier that achieved a balanced accuracy higher than 0.75.</p>
        <p>For the source-category combinations that did not have a classifier that achieved a balanced accuracy of at least 0.75, we performed another round of experiments in which we attempted to classify posts using the best-performing classifier trained on a corresponding category from another source, for example, random forest for share experiences from WebMD. In these experiments, we used 500 posts from one source for training and 500 posts from another source for testing and again finding the best combination of hyperparameters via a 5-fold cross-validation of the training data. <xref ref-type="table" rid="table9">Table 9</xref> shows the results of these experiments. Classifiers trained on the DailyStrength and Twitter data achieved a balanced accuracy of over 0.75 on the share experiences category from Google+, so we added this category to the set of categories considered for further analysis. For each category in this set, we used the model with the highest balanced accuracy for that category to label the rest of the data. We reported our findings on the frequency of these categories by several demographics according to their respective classifiers in the Results section.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>All classifiers’ training features.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Source</td>
                <td>Extracted features</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>WebMD</td>
                <td> Title, body, and board name</td>
              </tr>
              <tr valign="top">
                <td>DailyStrength</td>
                <td>Title, body, and board name</td>
              </tr>
              <tr valign="top">
                <td>Google+</td>
                <td> Title and body</td>
              </tr>
              <tr valign="top">
                <td>Twitter</td>
                <td>Body</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Classifier hyperparameter values evaluated in our experiments.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Classifier and hyperparameter</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Random forest</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum tree depth</td>
                <td>2, 4, 8, 16, 32, 64</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of trees, n</td>
                <td>10, 100, 1000</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Support vector machine</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td>0.001, 0.01, 0.1, 1, 10</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Loss function</td>
                <td>Hinge, squared hinge</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Convolutional neural network</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Filter window sizes</td>
                <td>(2, 3, 4), (3, 4, 5), (4, 5, 6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Feature maps per filter window size, n</td>
                <td>100, 200, 300, 400, 500, 600</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Classifier results for each category (N=50).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="130"/>
            <col width="140"/>
            <col width="130"/>
            <col width="140"/>
            <col width="130"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Source and category</td>
                <td colspan="2">Random forest</td>
                <td colspan="2">Support vector machine</td>
                <td colspan="2">Convolutional neural network</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Accuracy, n (%)</td>
                <td>Balanced accuracy</td>
                <td>Accuracy, n (%)</td>
                <td>Balanced accuracy</td>
                <td>Accuracy, n (%)</td>
                <td>Balanced accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>WebMD</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share experiences<sup>a</sup></td>
                <td>41 (82)</td>
                <td>0.83<sup>b</sup></td>
                <td>41 (82)</td>
                <td>0.81</td>
                <td>41 (82)</td>
                <td>0.82</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ask for specific medical advice or information<sup>a</sup></td>
                <td>40 (80)</td>
                <td>0.82</td>
                <td>41 (82)</td>
                <td>0.83<sup>b</sup></td>
                <td>37 (74)</td>
                <td>0.76</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Request or give psychological support<sup>a</sup></td>
                <td>39 (78)</td>
                <td>0.71</td>
                <td>43 (86)</td>
                <td>0.8 <sup>b</sup></td>
                <td>38 (76)</td>
                <td>0.68</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>About Family (Not about self)<sup>a</sup></td>
                <td>38 (76)</td>
                <td>0.56</td>
                <td>40 (80)</td>
                <td>0.89<sup>b</sup></td>
                <td>47 (94)</td>
                <td>0.81</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>DailyStrength</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share experiences<sup>a</sup></td>
                <td>41 (82)</td>
                <td>0.80</td>
                <td>40 (80)</td>
                <td>0.70</td>
                <td>41 (82)</td>
                <td>0.82<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ask for specific medical advice or information<sup>a</sup></td>
                <td>39 (78)</td>
                <td>0.71</td>
                <td>38 (76)</td>
                <td>0.70</td>
                <td>37 (74)</td>
                <td>0.7 <sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Request or give psychological support</td>
                <td>34 (68)</td>
                <td>0.68</td>
                <td>33 (66)</td>
                <td>0.65</td>
                <td>38 (76)</td>
                <td>0.68<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>TwitterHealth</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share experiences<sup>a</sup></td>
                <td>39 (78)</td>
                <td>0.77</td>
                <td>41 (82)</td>
                <td>0.82<sup>b</sup></td>
                <td>43 (86)</td>
                <td>0.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share news<sup>a</sup></td>
                <td>41 (82)</td>
                <td>0.64</td>
                <td>40 (80)</td>
                <td>0.73</td>
                <td>47 (94)</td>
                <td>0.81</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Google+Health</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share experiences</td>
                <td>44 (88)</td>
                <td>0.48</td>
                <td>35 (70)</td>
                <td>0.72<sup>b</sup></td>
                <td>45 (90)</td>
                <td>0.60</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Share news</td>
                <td>26 (52)</td>
                <td>0.48</td>
                <td>28 (56)</td>
                <td>0.52</td>
                <td>33 (66)</td>
                <td>0.59<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Advertisement</td>
                <td>38 (76)</td>
                <td>0.59</td>
                <td>24 (48)</td>
                <td>0.53</td>
                <td>42 (84)</td>
                <td>0.6 <sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Personal opinion</td>
                <td>39 (78)</td>
                <td>0.48</td>
                <td>37 (74)</td>
                <td>0.71<sup>b</sup></td>
                <td>42 (84)</td>
                <td>0.60</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Educational material<sup>a</sup></td>
                <td>40 (80)</td>
                <td>0.66</td>
                <td>34 (68)</td>
                <td>0.76</td>
                <td>41 (82)</td>
                <td>0.79<sup>b</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>The category of each source-category combination with at least one classifier that achieved a balanced accuracy of at least 0.75.</p>
            </fn>
            <fn id="table8fn2">
              <p><sup>b</sup>The highest balanced accuracy for each source-category combination.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Results of classifiers trained on a corresponding category from another source (N=500).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="170"/>
            <col width="200"/>
            <col width="170"/>
            <col width="130"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td>Training source</td>
                <td>Test source</td>
                <td>Category</td>
                <td>Classifier</td>
                <td>Accuracy, n (%)</td>
                <td>Balanced accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>WebMD</td>
                <td>DailyStrength</td>
                <td>Psychological support</td>
                <td>SVM<sup>a</sup></td>
                <td>328 (65.6)</td>
                <td>0.656</td>
              </tr>
              <tr valign="top">
                <td>WebMD</td>
                <td>Google+Health</td>
                <td>Share experiences</td>
                <td>Random forest</td>
                <td>428 (85.6)</td>
                <td>0.584</td>
              </tr>
              <tr valign="top">
                <td>DailyStrength</td>
                <td>
                  <italic>Google+Health</italic>
                  <sup>b</sup>
                </td>
                <td>
                  <italic>Share experiences</italic>
                </td>
                <td>CNN<sup>c</sup></td>
                <td>383 (76.6)</td>
                <td>
                  <italic>0.800</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>Twitter</td>
                <td>
                  <italic>Google+Health</italic>
                </td>
                <td>
                  <italic>Share experiences</italic>
                </td>
                <td>SVM</td>
                <td>408 (81.6)</td>
                <td>
                  <italic>0.770</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>Twitter</td>
                <td>Google+Health</td>
                <td>Share news</td>
                <td>CNN</td>
                <td>360 (72.0)</td>
                <td>0.562</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>The test source, category, and balanced accuracy of each classifier that achieved a balanced accuracy of at least 0.75 are italicized for emphasis.</p>
            </fn>
            <fn id="table9fn3">
              <p><sup>c</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Demographic Analysis</title>
        <p>We chose four demographic attributes as shown in <xref ref-type="table" rid="table1">Table 1</xref>: gender, age, ethnicity, and location. Where possible, we extracted these attributes from user profiles. These attributes are not available for every source, so we used existing classifier models where available to estimate their values. Specifically, we used the classifiers from Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>] to estimate gender for Twitter users and ethnicity for both Twitter and Google+ users. To estimate gender for WebMD users, we used the classifier from Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>], an extension of the classifier by Mislove et al that considers a user’s screen name when the user’s first name is not present. These classifiers use the 1000 most popular male and female birth names reported by the US Social Security Administration for each year from 1935 to 1995 as ground truth for gender and the distribution of ethnicities for each last name as reported by the 2000 US Census as ground truth for ethnicity. For each of these attributes, we used the data labeled by our post content category classifiers to determine how frequently users of each demographic write a post with one of these categories, for example, the percentage of posts made by male users in which a user shared his experiences. When comparing these percentages, we calculated statistical significance via a Pearson chi-square test. Note that a post can be in more than one category, for example, a post can both share experiences and ask for medical advice.</p>
      </sec>
      <sec>
        <title>Top Distinctive Message Boards</title>
        <p>For each combination of demographic and category (eg, male and share experiences) analyzed in WebMD and DailyStrength, we found the most distinctive message boards for that combination. For WebMD, we considered only boards that have at least 0.01% of posts for a given combination, or 30 if 0.01% is less than 30. Owing to the large number of message boards on DailyStrength (1608 analyzed in this study), we reduced this restriction to only consider boards with at least 30 posts for a given combination. We then determined distinctiveness by calculating the relative difference of each board. On the basis of the calculation for top distinctive terms by Sadah et al [<xref ref-type="bibr" rid="ref13">13</xref>], we calculated the relative difference of board <italic>b</italic> within the combination of category <italic>c</italic> and demographic <italic>b</italic> of demographic attribute <italic>a</italic> as shown in equation (1):</p>
        <disp-quote>
          <p>RelDifcd(b)=[Freqcd(b)−AvgFreqca(b)]/AvgFreqca(b) (1),</p>
        </disp-quote>
        <p>where <italic>Freq<sub>cd</sub>(b)</italic> is the normalized frequency of posts on board <italic>b</italic> in category <italic>c</italic> by a user in demographic <italic>d</italic>, for example, the number of posts on the WebMD Breast Cancer message board that share experiences and were written by a female user divided by the number of posts on WebMD that share experiences and were written by a female user. <italic>AvgFreq<sub>ca</sub>(b)</italic> is the average <italic>Freq<sub>cd</sub>(b)</italic> across all demographics <italic>d</italic> within the demographic attribute <italic>a</italic>, for example, male and female for the demographic attribute, gender.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Demographics</title>
        <p>In this section, we presented the categories’ results by each demographic where possible. For age demographics, we organized users into five groups: 0 to 17 years, 18 to 34 years, 35 to 44 years, 46 to 64 years, and older than 65 years. For ethnicity, we considered four possibilities: Asian, black, Hispanic, and white. For location, we considered the four regions designated by the US Census Bureau: Midwest, Northeast, South, and West. As explained in the Methods section, we considered the following categories for each source: (1) WebMD: share experiences, ask for advice, psychological support, and about family; (2) DailyStrength: share experiences and ask for advice; (3) TwitterHealth: share experiences and share news; and (4) Google+Health: share experiences and educational material.</p>
      </sec>
      <sec>
        <title>WebMD</title>
        <p>As shown in <xref ref-type="table" rid="table1">Table 1</xref>, our WebMD dataset includes gender predicted by the gender classifier from Sadah et al [<xref ref-type="bibr" rid="ref12">12</xref>]. Therefore, we have reported the distribution of gender among its categories. <xref ref-type="table" rid="table10">Table 10</xref> shows the frequency of posts made by male and female users for each category. We found that 70.04% (4741/6769) of posts written by male WebMD users asked for advice, compared with 45.14% (6372/14,117) of posts by female users (<italic>P</italic>&#60;.001). <xref ref-type="table" rid="table11">Table 11</xref> shows the top 10 most distinctive WebMD message boards by the number of posts for each combination of gender and category. Unsurprisingly, these results show that female users were more likely to post on boards about pregnancy and parenting than males in all categories, whereas male users were more likely to discuss men’s health issues. Men also gave psychological support and discussed family members on the message board for the infertility drug, Clomid, more frequently than women.</p>
        <table-wrap position="float" id="table10">
          <label>Table 10</label>
          <caption>
            <p>WebMD category frequency by gender.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="510"/>
            <col width="250"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td colspan="2">Gender, n (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male (n=6769)</td>
                <td>Female (n=14,117)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Share experiences</td>
                <td>3290 (48.60)</td>
                <td>4835 (34.25)</td>
              </tr>
              <tr valign="top">
                <td>Ask for advice</td>
                <td>4741 (70.04)</td>
                <td>6372 (45.14)</td>
              </tr>
              <tr valign="top">
                <td>Psychological support</td>
                <td>1914 (28.28)</td>
                <td>5515 (39.07)</td>
              </tr>
              <tr valign="top">
                <td>About family</td>
                <td>1986 (29.34)</td>
                <td>3623 (25.66)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table11">
          <label>Table 11</label>
          <caption>
            <p>Top 10 most distinctive WebMD message boards for male and female users in each category.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="70"/>
            <col width="230"/>
            <col width="230"/>
            <col width="240"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Gender</td>
                <td>Share experiences</td>
                <td>Ask for advice</td>
                <td>Psychological support</td>
                <td>About family</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Male</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Men’s Health</p>
                    </list-item>
                    <list-item>
                      <p>Erectile Dysfunction</p>
                    </list-item>
                    <list-item>
                      <p>Relationships and Coping</p>
                    </list-item>
                    <list-item>
                      <p>Cholesterol Management</p>
                    </list-item>
                    <list-item>
                      <p>Epilepsy</p>
                    </list-item>
                    <list-item>
                      <p>Depression</p>
                    </list-item>
                    <list-item>
                      <p>Allergies</p>
                    </list-item>
                    <list-item>
                      <p>Oral Health</p>
                    </list-item>
                    <list-item>
                      <p>Knee &#38; Hip Replacement</p>
                    </list-item>
                    <list-item>
                      <p>Ear, Nose &#38; Throat</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Erectile Dysfunction</p>
                    </list-item>
                    <list-item>
                      <p>Cholesterol Management</p>
                    </list-item>
                    <list-item>
                      <p>Men’s Health</p>
                    </list-item>
                    <list-item>
                      <p>HIV/AIDS</p>
                    </list-item>
                    <list-item>
                      <p>Depression</p>
                    </list-item>
                    <list-item>
                      <p>Epilepsy</p>
                    </list-item>
                    <list-item>
                      <p>Prostate Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Sports Medicine</p>
                    </list-item>
                    <list-item>
                      <p>Pain Management</p>
                    </list-item>
                    <list-item>
                      <p>Ear, Nose &#38; Throat</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Relationships and Coping</p>
                    </list-item>
                    <list-item>
                      <p>Epilepsy</p>
                    </list-item>
                    <list-item>
                      <p>Depression</p>
                    </list-item>
                    <list-item>
                      <p>Back Pain</p>
                    </list-item>
                    <list-item>
                      <p>Heart Disease</p>
                    </list-item>
                    <list-item>
                      <p>Pain Management</p>
                    </list-item>
                    <list-item>
                      <p>Anxiety &#38; Panic</p>
                    </list-item>
                    <list-item>
                      <p>Clomid</p>
                    </list-item>
                    <list-item>
                      <p>Diabetes</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: 4 &#38; 5-Year-Olds</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Relationships and Coping</p>
                    </list-item>
                    <list-item>
                      <p>Depression</p>
                    </list-item>
                    <list-item>
                      <p>Erectile Dysfunction</p>
                    </list-item>
                    <list-item>
                      <p>Back Pain</p>
                    </list-item>
                    <list-item>
                      <p>Clomid</p>
                    </list-item>
                    <list-item>
                      <p>Epilepsy</p>
                    </list-item>
                    <list-item>
                      <p>Anxiety &#38; Panic</p>
                    </list-item>
                    <list-item>
                      <p>Pain Management</p>
                    </list-item>
                    <list-item>
                      <p>Sleep Disorders</p>
                    </list-item>
                    <list-item>
                      <p>Digestive Disorders</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Female</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Sexual Abuse Survivors Support</p>
                    </list-item>
                    <list-item>
                      <p>Trying to Conceive: 12 Months, Still Trying</p>
                    </list-item>
                    <list-item>
                      <p>Endometriosis</p>
                    </list-item>
                    <list-item>
                      <p>Breast Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Infertility Treatment</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy: After Infertility</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy: After 35</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: Elementary Ages</p>
                    </list-item>
                    <list-item>
                      <p>Self-Harm</p>
                    </list-item>
                    <list-item>
                      <p>Menopause</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Trying to Conceive: 12 Months, Still Trying</p>
                    </list-item>
                    <list-item>
                      <p>Infertility Treatment</p>
                    </list-item>
                    <list-item>
                      <p>Dieting Club: 25-50 Lbs</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: Preteens &#38; Teenagers</p>
                    </list-item>
                    <list-item>
                      <p>Skin &#38; Beauty</p>
                    </list-item>
                    <list-item>
                      <p>Breast Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Food &#38; Cooking</p>
                    </list-item>
                    <list-item>
                      <p>Lupus</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: 3-Year-Olds</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: 9-12 Months</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Chronic Fatigue Syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Lupus</p>
                    </list-item>
                    <list-item>
                      <p>Sexual Abuse Survivors Support</p>
                    </list-item>
                    <list-item>
                      <p>Breast Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Endometriosis</p>
                    </list-item>
                    <list-item>
                      <p>Dieting Club: 10-25 Lbs</p>
                    </list-item>
                    <list-item>
                      <p>Trying to Conceive: 12 Months, Still Trying</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy: After 35</p>
                    </list-item>
                    <list-item>
                      <p>Dieting Club: 100+ Lbs</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy: After Infertility</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Sexual Abuse Survivors Support</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy: After 35</p>
                    </list-item>
                    <list-item>
                      <p>Trying to Conceive: 12 Months, Still Trying</p>
                    </list-item>
                    <list-item>
                      <p>Trying to Conceive: After Loss</p>
                    </list-item>
                    <list-item>
                      <p>Breast Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Self-Harm</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: Preteens &#38; Teenagers</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: 9-12 Months</p>
                    </list-item>
                    <list-item>
                      <p>Dieting Club: 50-100 Lbs</p>
                    </list-item>
                    <list-item>
                      <p>Parenting: 6-9 Months</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>DailyStrength</title>
        <p>For our DailyStrength demographic attributes, gender, age, and location, we reported the results for the categories share experiences and ask for advice. <xref ref-type="table" rid="table12">Table 12</xref> shows the category frequencies for each demographic. The majority of posts (over 80%) from every demographic share experiences; but among the different age demographics, we saw a clear decline in frequency as age increases, from 92.77% (6175/6656) for users aged younger than 18 years to 81.82% (24,420/29,847) for users 65 years and older (<italic>P</italic>&#60;.001). The frequency of posts that ask for advice is similar for almost every demographic (30%-40%), with the exception of posts from users younger than 18 years 25.45% (1694/6656). <italic>P</italic>&#60;.001 for all comparisons between users younger than 18 years and other age groups.</p>
        <p><xref ref-type="table" rid="table13">Tables 13</xref>-<xref ref-type="table" rid="table15">15</xref> show the top 10 most distinctive DailyStrength message boards by the number of posts for each combination of gender and category, age group and category, and location and category, respectively. From these lists, we saw a wider variety of topics compared with WebMD, likely because of the large number of message boards on DailyStrength. However, we still saw some trends when considering broader topics. Male users tend to share experiences on message boards related to personal and social issues. Both male and female users asked for advice most frequently on boards related to physical conditions.</p>
        <p>We also observed a general tendency for younger users (aged younger than 45 years) to share experiences on message boards about personal and social issues, whereas older users favored message boards for general support and discussion. Users in all age groups frequently asked for advice about physical conditions. We found no clear trend in sharing experiences when evaluating census regions, but we saw that users from the Northeast region share experiences about physical and psychological conditions, whereas users from the West region often shared experiences on message boards for general support and discussion. Users from all regions frequently asked for advice about physical conditions except the West, whose users tended to ask for advice on message boards for general support and discussion. Note that there are fewer than 10 message boards listed for users of age 0 to 17 years who asked for advice in <xref ref-type="table" rid="table14">Table 14</xref> because of the lack of message boards that also met our restriction of having at least 30 of these posts.</p>
        <table-wrap position="float" id="table12">
          <label>Table 12</label>
          <caption>
            <p>DailyStrength category frequency by gender, age, and location.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="290"/>
            <col width="220"/>
            <col width="230"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attribute and demographic</td>
                <td>Total number of participants</td>
                <td>Share experiences, n (%)</td>
                <td>Ask for advice, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>95,269</td>
                <td>78,760 (82.67)</td>
                <td>31,706 (33.28)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>456,600</td>
                <td>409,640 (89.72)</td>
                <td>167,867 (36.76)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Age group (years)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0-17</td>
                <td>6656</td>
                <td>6175 (92.77)</td>
                <td>1694 (25.45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-34</td>
                <td>187,966</td>
                <td>173,226 (92.16)</td>
                <td>65,191 (34.68)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>35-44</td>
                <td>126,646</td>
                <td>113,796 (89.85)</td>
                <td>48,335 (38.17)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>45-64</td>
                <td>149,487</td>
                <td>127,089 (85.02)</td>
                <td>54,008 (36.13)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥65</td>
                <td>29,847</td>
                <td>24,420 (81.82)</td>
                <td>10,581 (35.45)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Northeast</td>
                <td>73,221</td>
                <td>65,761 (89.81)</td>
                <td>28,196 (38.51)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>123,556</td>
                <td>76,630 (90.90)</td>
                <td>31,600 (37.48)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>123,556</td>
                <td>110,597 (89.51)</td>
                <td>46,933 (37.99)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>92,809</td>
                <td>76,797 (82.75)</td>
                <td>31,481 (33.92)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table13">
          <label>Table 13</label>
          <caption>
            <p>Top 10 most distinctive DailyStrength message boards for male and female users in each category.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="90"/>
            <col width="400"/>
            <col width="510"/>
            <thead>
              <tr valign="top">
                <td>Gender</td>
                <td>Share experiences</td>
                <td>Ask for advice</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Male</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Vow To Live LGBT Against Suicide</p>
                    </list-item>
                    <list-item>
                      <p>Christian Church 24.7 Ministry</p>
                    </list-item>
                    <list-item>
                      <p>Gay Men’s Challenges</p>
                    </list-item>
                    <list-item>
                      <p>Single Dads</p>
                    </list-item>
                    <list-item>
                      <p>GOYA</p>
                    </list-item>
                    <list-item>
                      <p>Dealing with Diabetes2 and remembering Goldi</p>
                    </list-item>
                    <list-item>
                      <p>A Child Abuse Survivors Group</p>
                    </list-item>
                    <list-item>
                      <p>CALM and EASY GAMES</p>
                    </list-item>
                    <list-item>
                      <p>Financial Challenges</p>
                    </list-item>
                    <list-item>
                      <p>Liars Anonymous</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>A Laughter Club</p>
                    </list-item>
                    <list-item>
                      <p>Dealing with Diabetes2 and remembering Goldi</p>
                    </list-item>
                    <list-item>
                      <p>Impotence &#38; Erectile Dysfunction</p>
                    </list-item>
                    <list-item>
                      <p>Sex/Pornography Addiction</p>
                    </list-item>
                    <list-item>
                      <p>High Cholesterol</p>
                    </list-item>
                    <list-item>
                      <p>Tinnitus, Deafness and Ear Problems</p>
                    </list-item>
                    <list-item>
                      <p>Urinary Incontinence</p>
                    </list-item>
                    <list-item>
                      <p>Atrial Fibrillation (AFib)</p>
                    </list-item>
                    <list-item>
                      <p>MRSA</p>
                    </list-item>
                    <list-item>
                      <p>LDN .. Low Dose Naltrexone</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Female</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>helping with the housework</p>
                    </list-item>
                    <list-item>
                      <p>Lesbian Relationship Challenges</p>
                    </list-item>
                    <list-item>
                      <p>prompts</p>
                    </list-item>
                    <list-item>
                      <p>AlAnon One Day At A Time</p>
                    </list-item>
                    <list-item>
                      <p>Daughters of Abusive Mothers</p>
                    </list-item>
                    <list-item>
                      <p>Breastfeeding</p>
                    </list-item>
                    <list-item>
                      <p>Parenting Toddlers (1-3)</p>
                    </list-item>
                    <list-item>
                      <p>Post-Partum Depression</p>
                    </list-item>
                    <list-item>
                      <p>Infertility</p>
                    </list-item>
                    <list-item>
                      <p>Vulvar Cancer</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pregnancy</p>
                    </list-item>
                    <list-item>
                      <p>Menopause</p>
                    </list-item>
                    <list-item>
                      <p>Trying To Conceive</p>
                    </list-item>
                    <list-item>
                      <p>Miscarriage</p>
                    </list-item>
                    <list-item>
                      <p>Polycystic Ovarian Syndrome (PCOS)</p>
                    </list-item>
                    <list-item>
                      <p>Family &#38; Friends of Bipolar</p>
                    </list-item>
                    <list-item>
                      <p>WHY WEIGHT? LET’S LOSE WEIGHT AND FEEL GREAT!</p>
                    </list-item>
                    <list-item>
                      <p>Infertility</p>
                    </list-item>
                    <list-item>
                      <p>Vulvar Cancer</p>
                    </list-item>
                    <list-item>
                      <p>Breastfeeding</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p/>
        <table-wrap position="float" id="table14">
          <label>Table 14</label>
          <caption>
            <p>Top 10 most distinctive DailyStrength message boards for each age group in each category.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="430"/>
            <col width="430"/>
            <thead>
              <tr valign="top">
                <td>Age group (years)</td>
                <td>Share experiences</td>
                <td>Ask for advice</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0-17</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Weight Loss For Teens</p>
                    </list-item>
                    <list-item>
                      <p>Gay &#38; Lesbian Teens</p>
                    </list-item>
                    <list-item>
                      <p>Depression–Teen</p>
                    </list-item>
                    <list-item>
                      <p>Bipolar Disorder–Teen</p>
                    </list-item>
                    <list-item>
                      <p>Self-Injury</p>
                    </list-item>
                    <list-item>
                      <p>Transgender</p>
                    </list-item>
                    <list-item>
                      <p>Depression</p>
                    </list-item>
                    <list-item>
                      <p>Coming Out</p>
                    </list-item>
                    <list-item>
                      <p>Bisexuality</p>
                    </list-item>
                    <list-item>
                      <p>Eating Disorders</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Weight Loss For Teens</p>
                    </list-item>
                    <list-item>
                      <p>Depression–Teen</p>
                    </list-item>
                    <list-item>
                      <p>Self-Injury</p>
                    </list-item>
                    <list-item>
                      <p>Eating Disorders</p>
                    </list-item>
                    <list-item>
                      <p>Anxiety</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>18-34</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Sunny and Peaceful Skies</p>
                    </list-item>
                    <list-item>
                      <p>Parenting Toddlers (1-3)</p>
                    </list-item>
                    <list-item>
                      <p>Daily Positive Thoughts</p>
                    </list-item>
                    <list-item>
                      <p>Trying To Conceive</p>
                    </list-item>
                    <list-item>
                      <p>Parenting Newborns &#38; Infants (0-1)</p>
                    </list-item>
                    <list-item>
                      <p>College Stress</p>
                    </list-item>
                    <list-item>
                      <p>Arnold-Chiari Malformation</p>
                    </list-item>
                    <list-item>
                      <p>ALL MOODY BLUES</p>
                    </list-item>
                    <list-item>
                      <p>Career Changes</p>
                    </list-item>
                    <list-item>
                      <p>Cerebral Palsy</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Trying To Conceive</p>
                    </list-item>
                    <list-item>
                      <p>Neuropathy</p>
                    </list-item>
                    <list-item>
                      <p>Pregnancy</p>
                    </list-item>
                    <list-item>
                      <p>Miscarriage</p>
                    </list-item>
                    <list-item>
                      <p>Polycystic Ovarian Syndrome (PCOS)</p>
                    </list-item>
                    <list-item>
                      <p>Cerebral Palsy</p>
                    </list-item>
                    <list-item>
                      <p>Endometriosis</p>
                    </list-item>
                    <list-item>
                      <p>Pseudotumor Cerebri</p>
                    </list-item>
                    <list-item>
                      <p>Sexually Transmitted Diseases–Female</p>
                    </list-item>
                    <list-item>
                      <p>Schizophrenia</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>35-44</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Vow To Live LGBT Against Suicide</p>
                    </list-item>
                    <list-item>
                      <p>Parenting 'Tweens (9-12)</p>
                    </list-item>
                    <list-item>
                      <p>Twins, Triplets &#38; More</p>
                    </list-item>
                    <list-item>
                      <p>Self-Hate Syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Parents Whose children have been sexually abused</p>
                    </list-item>
                    <list-item>
                      <p>HOPEFUL HEARTS...LIVING AGAIN AFTER THE LOSS</p>
                    </list-item>
                    <list-item>
                      <p>Neurofibromatosis</p>
                    </list-item>
                    <list-item>
                      <p>Breastfeeding</p>
                    </list-item>
                    <list-item>
                      <p>Hyperparathyroidism</p>
                    </list-item>
                    <list-item>
                      <p>Stillbirth</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>kindredspirits</p>
                    </list-item>
                    <list-item>
                      <p>Hyperparathyroidism</p>
                    </list-item>
                    <list-item>
                      <p>Multiple Sclerosis (MS)</p>
                    </list-item>
                    <list-item>
                      <p>Pseudotumor Cerebri</p>
                    </list-item>
                    <list-item>
                      <p>Allergies</p>
                    </list-item>
                    <list-item>
                      <p>Hemochromatosis</p>
                    </list-item>
                    <list-item>
                      <p>Hypothyroidism</p>
                    </list-item>
                    <list-item>
                      <p>Addison’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>MCTD</p>
                    </list-item>
                    <list-item>
                      <p>Graves’ Disease</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>45-64</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>acoa sanctuary</p>
                    </list-item>
                    <list-item>
                      <p>prompts</p>
                    </list-item>
                    <list-item>
                      <p>Christians with MS</p>
                    </list-item>
                    <list-item>
                      <p>InHisCare Bible Study</p>
                    </list-item>
                    <list-item>
                      <p>The Serenity Room</p>
                    </list-item>
                    <list-item>
                      <p>Ticked off about Lyme</p>
                    </list-item>
                    <list-item>
                      <p>Biblical Studies and Archaeology</p>
                    </list-item>
                    <list-item>
                      <p>Alanon support group</p>
                    </list-item>
                    <list-item>
                      <p>Just support</p>
                    </list-item>
                    <list-item>
                      <p>WHY WEIGHT? LET’S LOSE WEIGHT AND FEEL GREAT!</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>WHY WEIGHT? LETS LOSE WEIGHT AND FEEL GREAT!</p>
                    </list-item>
                    <list-item>
                      <p>MS People Dealing with MS Pain</p>
                    </list-item>
                    <list-item>
                      <p>Dealing with Diabetes2 and remembering Goldi</p>
                    </list-item>
                    <list-item>
                      <p>Multiple Myeloma</p>
                    </list-item>
                    <list-item>
                      <p>Menopause</p>
                    </list-item>
                    <list-item>
                      <p>High Cholesterol</p>
                    </list-item>
                    <list-item>
                      <p>LDN .. Low Dose Naltrexone</p>
                    </list-item>
                    <list-item>
                      <p>Myofascial Pain Syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Neurocardiogenic Syncope</p>
                    </list-item>
                    <list-item>
                      <p>Amputees</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>≥65</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Banana</p>
                    </list-item>
                    <list-item>
                      <p>A Little Bit Of Kindness Goes A long Way!</p>
                    </list-item>
                    <list-item>
                      <p>AlAnon One Day At A Time</p>
                    </list-item>
                    <list-item>
                      <p>VOICES OF RECOVERY</p>
                    </list-item>
                    <list-item>
                      <p>The Walking Group</p>
                    </list-item>
                    <list-item>
                      <p>The Front Porch</p>
                    </list-item>
                    <list-item>
                      <p>Over The Fence</p>
                    </list-item>
                    <list-item>
                      <p>Muscular Dystrophies</p>
                    </list-item>
                    <list-item>
                      <p>CALM and EASY GAMES</p>
                    </list-item>
                    <list-item>
                      <p>movie lovers</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>AlAnon One Day At A Time</p>
                    </list-item>
                    <list-item>
                      <p>VOICES OF RECOVERY</p>
                    </list-item>
                    <list-item>
                      <p>I can’t HEAR you!</p>
                    </list-item>
                    <list-item>
                      <p>COPD &#38; Emphysema</p>
                    </list-item>
                    <list-item>
                      <p>Meniere’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>Parkinson’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>Sleep Apnea</p>
                    </list-item>
                    <list-item>
                      <p>Interstitial Cystitis (IC)</p>
                    </list-item>
                    <list-item>
                      <p>Atrial Fibrillation (AFib)</p>
                    </list-item>
                    <list-item>
                      <p>Acromegaly</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table15">
          <label>Table 15</label>
          <caption>
            <p>Top 10 most distinctive DailyStrength message boards for each region in each category.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="440"/>
            <col width="440"/>
            <thead>
              <tr valign="top">
                <td>Region</td>
                <td>Share experiences</td>
                <td>Ask for advice</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Northeast</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>WHY WEIGHT? LET’S LOSE WEIGHT AND FEEL GREAT!</p>
                    </list-item>
                    <list-item>
                      <p>Self-Hate Syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Smoking Addiction &#38; Recovery</p>
                    </list-item>
                    <list-item>
                      <p>Urinary Incontinence</p>
                    </list-item>
                    <list-item>
                      <p>Families of Prisoners</p>
                    </list-item>
                    <list-item>
                      <p>Agoraphobia &#38; Social Anxiety</p>
                    </list-item>
                    <list-item>
                      <p>Cocaine Addiction &#38; Recovery</p>
                    </list-item>
                    <list-item>
                      <p>Obesity</p>
                    </list-item>
                    <list-item>
                      <p>CHRISTIAN PARENTS of ESTRANGED ADULT CHILDREN</p>
                    </list-item>
                    <list-item>
                      <p>Brain Injury</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>WHY WEIGHT? LET’S LOSE WEIGHT AND FEEL GREAT!</p>
                    </list-item>
                    <list-item>
                      <p>Obesity</p>
                    </list-item>
                    <list-item>
                      <p>Hidradenitis Suppurativa</p>
                    </list-item>
                    <list-item>
                      <p>Endometriosis</p>
                    </list-item>
                    <list-item>
                      <p>Deep Vein Thrombosis (DVT)</p>
                    </list-item>
                    <list-item>
                      <p>Atrial Fibrillation (AFib)</p>
                    </list-item>
                    <list-item>
                      <p>Diets &#38; Weight Maintenance</p>
                    </list-item>
                    <list-item>
                      <p>Gastritis</p>
                    </list-item>
                    <list-item>
                      <p>Polycystic Kidney Disease (PKD)</p>
                    </list-item>
                    <list-item>
                      <p>Hypothyroidism</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Midwest</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Just support</p>
                    </list-item>
                    <list-item>
                      <p>acoa sanctuary</p>
                    </list-item>
                    <list-item>
                      <p>helping with the housework</p>
                    </list-item>
                    <list-item>
                      <p>kindredspirits</p>
                    </list-item>
                    <list-item>
                      <p>The Coffee Shop</p>
                    </list-item>
                    <list-item>
                      <p>aa Spoken Here</p>
                    </list-item>
                    <list-item>
                      <p>Highly Sensitive People HSP</p>
                    </list-item>
                    <list-item>
                      <p>Financial Challenges</p>
                    </list-item>
                    <list-item>
                      <p>I can’t HEAR you!</p>
                    </list-item>
                    <list-item>
                      <p>Pseudotumor Cerebri</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>kindredspirits</p>
                    </list-item>
                    <list-item>
                      <p>Neurocardiogenic Syncope</p>
                    </list-item>
                    <list-item>
                      <p>Pseudotumor Cerebri</p>
                    </list-item>
                    <list-item>
                      <p>Gastritis</p>
                    </list-item>
                    <list-item>
                      <p>Irritable Bowel Syndrome (IBS)</p>
                    </list-item>
                    <list-item>
                      <p>COPD &#38; Emphysema</p>
                    </list-item>
                    <list-item>
                      <p>Parkinson’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>Polycystic Kidney Disease (PKD)</p>
                    </list-item>
                    <list-item>
                      <p>Pancreatitis</p>
                    </list-item>
                    <list-item>
                      <p>Graves’ Disease</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>South</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>prompts</p>
                    </list-item>
                    <list-item>
                      <p>Beyond Medication</p>
                    </list-item>
                    <list-item>
                      <p>InHisCare Bible Study</p>
                    </list-item>
                    <list-item>
                      <p>Ticked off about Lyme</p>
                    </list-item>
                    <list-item>
                      <p>Muscular Dystrophies</p>
                    </list-item>
                    <list-item>
                      <p>aa friends</p>
                    </list-item>
                    <list-item>
                      <p>Anxiety and POSITIVE CHOICES</p>
                    </list-item>
                    <list-item>
                      <p>Games for Fun and Relaxation</p>
                    </list-item>
                    <list-item>
                      <p>MS People Dealing with MS Pain</p>
                    </list-item>
                    <list-item>
                      <p>Parents Whose children have been sexually abused</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>MS People Dealing with MS Pain</p>
                    </list-item>
                    <list-item>
                      <p>High Cholesterol</p>
                    </list-item>
                    <list-item>
                      <p>Cirrhosis</p>
                    </list-item>
                    <list-item>
                      <p>Polymyositis &#38; Dermatomyositis</p>
                    </list-item>
                    <list-item>
                      <p>Addison’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>Meniere’s Disease</p>
                    </list-item>
                    <list-item>
                      <p>MCTD</p>
                    </list-item>
                    <list-item>
                      <p>Trying To Conceive</p>
                    </list-item>
                    <list-item>
                      <p>Endometriosis</p>
                    </list-item>
                    <list-item>
                      <p>Polycystic Ovarian Syndrome (PCOS)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>West</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>A Little Bit Of Kindness Goes A long Way!</p>
                    </list-item>
                    <list-item>
                      <p>The Walking Group</p>
                    </list-item>
                    <list-item>
                      <p>Alanon support group</p>
                    </list-item>
                    <list-item>
                      <p>VOICES OF RECOVERY</p>
                    </list-item>
                    <list-item>
                      <p>AlAnon One Day At A Time</p>
                    </list-item>
                    <list-item>
                      <p>BIBLICAL STUDIES</p>
                    </list-item>
                    <list-item>
                      <p>The Sunflower group</p>
                    </list-item>
                    <list-item>
                      <p>My Favorite Things.</p>
                    </list-item>
                    <list-item>
                      <p>FrIeNdShIpRoOm</p>
                    </list-item>
                    <list-item>
                      <p>three prayerpraise</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>AlAnon One Day At A Time</p>
                    </list-item>
                    <list-item>
                      <p>Banana</p>
                    </list-item>
                    <list-item>
                      <p>The Sunflower group</p>
                    </list-item>
                    <list-item>
                      <p>WINGS</p>
                    </list-item>
                    <list-item>
                      <p>VOICES OF RECOVERY</p>
                    </list-item>
                    <list-item>
                      <p>A Laughter Club</p>
                    </list-item>
                    <list-item>
                      <p>FrIeNdShIpRoOm</p>
                    </list-item>
                    <list-item>
                      <p>Myofascial Pain Syndrome</p>
                    </list-item>
                    <list-item>
                      <p>Hemochromatosis</p>
                    </list-item>
                    <list-item>
                      <p>Colon Cancer</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Twitter</title>
        <p>For our Twitter demographic attributes, gender, ethnicity, and location, with gender and ethnicity predicted by the classifier from Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>], we reported the results for categories share experiences and share news using our sample of 142,411 tweets in <xref ref-type="table" rid="table16">Table 16</xref>. As described in the Methods section, this dataset was created from our full corpus by first sampling 1000 posts for each day represented in the dataset and then pruning tweets from likely bot accounts. All demographics analyzed shared experiences more often than they shared news. Hispanic users had the largest difference, with 29.16% (826/2833) of them shared experiences versus 5.47% (155/2833) of them shared news (<italic>P</italic>&#60;.001). Users from the Northeast census region had the smallest difference, with 20.38% (1093/5362) of them shared experiences versus 10.16% (545/5362) of them shared news; <italic>P</italic>&#60;.001. Where comparison is possible between these demographics and their counterparts in WebMD and DailyStrength, we saw that Twitter users shared experiences less frequently (<italic>P</italic>&#60;.001 for all such comparisons).</p>
        <table-wrap position="float" id="table16">
          <label>Table 16</label>
          <caption>
            <p>Twitter category frequency by gender, ethnicity, and location.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="350"/>
            <col width="210"/>
            <col width="210"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attribute and demographic</td>
                <td>Total number of participants</td>
                <td>Share experiences, n (%)</td>
                <td>Share news, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>16,092</td>
                <td>3188 (19.81)</td>
                <td>1277 (7.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>17,850</td>
                <td>4835 (27.09)</td>
                <td>1091 (6.11)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Ethnicity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td>626</td>
                <td>166 (26.52)</td>
                <td>34 (5.43)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black</td>
                <td>56</td>
                <td>12 (21)</td>
                <td>3 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>2833</td>
                <td>826 (29.16)</td>
                <td>155 (5.47)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>9992</td>
                <td>2259 (22.61)</td>
                <td>728 (7.29)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Northeast</td>
                <td>5362</td>
                <td>1093 (20.38)</td>
                <td>545 (10.16)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>4686</td>
                <td>1084 (23.13)</td>
                <td>380 (8.11)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>9855</td>
                <td>2162 (21.94)</td>
                <td>850 (8.63)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>5448</td>
                <td>1164 (21.37)</td>
                <td>515 (9.45)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>We also performed this analysis on our full Twitter dataset of 11,637,888 tweets. We compared these results with the results shown in <xref ref-type="table" rid="table16">Table 16</xref> and found that the differences were generally not statistically significant (with statistical significance defined as <italic>P</italic>&#60;.05) for the share experiences category but were significant for all but one demographic in the share news category. These findings agree with our evaluation of bot likelihood using our initial sample of 500 tweets, where we found that the share news category had a substantial number of tweets from likely bot accounts, but the share experiences category did not. The <italic>P</italic> values of these comparisons are shown in <xref ref-type="table" rid="table17">Table 17</xref>.</p>
        <table-wrap position="float" id="table17">
          <label>Table 17</label>
          <caption>
            <p><italic>P</italic> values of comparisons between Twitter results using pruned data and results using all data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="70"/>
            <col width="90"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Male</td>
                <td>Female</td>
                <td>Asian</td>
                <td>Black</td>
                <td>Hispanic</td>
                <td>White</td>
                <td>Northeast</td>
                <td>Midwest</td>
                <td>South</td>
                <td>West</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Share Experiences</td>
                <td>&#60;.001</td>
                <td>.47</td>
                <td>.24</td>
                <td>.80</td>
                <td>.68</td>
                <td>.15</td>
                <td>.13</td>
                <td>.048</td>
                <td>.002</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Share News</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>.23</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Google+</title>
        <p>Our Google+ demographic attributes include gender, age, ethnicity, and location, with ethnicity predicted by the classifier from Mislove et al [<xref ref-type="bibr" rid="ref17">17</xref>], and for these attributes we reported the results from the share experiences and educational material categories in <xref ref-type="table" rid="table18">Table 18</xref>. As classifiers trained on our labeled Google+ dataset did not achieve a sufficiently high balanced accuracy for the share experiences category, we considered classifiers trained on the labeled DailyStrength and Twitter data as described in the Methods section. The full set of Google+ posts were classified as 34.13% (63,709/186,666) share experiences by the DailyStrength-trained classifier and 18.83% (35,149/186,666) share experiences by the Twitter-trained classifier. As the latter distribution of the share experiences category is closer to the distribution reported in <xref ref-type="table" rid="table5">Table 5</xref>, 13.0% (65/500), we used the Twitter-trained classifier for the remainder of our analysis in the share experiences category.</p>
        <p>From these results, we saw that most demographics appeared to share experiences more frequently than the set of all Google+ users. This is likely the effect of a bias toward users who chose to report these attributes (or a real name, in the case of ethnicity). When comparing how often a demographic shares experiences with how often posts from users with no data on that demographic’s corresponding attribute share experiences (eg, posts from men vs posts from users who did not report gender), we found that <italic>P</italic>&#60;.001 for all such comparisons except for users aged ≥65 years (<italic>P</italic>=.83). Where comparison is possible between these demographics and their counterparts in WebMD and DailyStrength, we saw that Google+ users shared experiences less frequently (<italic>P</italic>&#60;.001 for all such comparisons).</p>
        <p>Educational material was shared less frequently by users aged between 35 and 44 years, 14.9% (46/308) than by users of any other age group. In particular, they shared educational material much less frequently than both the previous age group, 18 to 34 years, 25.5% (141/552), <italic>P</italic>&#60;.001; and the following age group, 45 to 64 years, 34.3% (171/499), <italic>P</italic>&#60;.001. Asian Google+ users, 35.75% (1010/2825), substantially shared more educational material than users of any other ethnicity (<italic>P</italic>=.002 vs black users, <italic>P</italic>&#60;.001 vs Hispanic users, and <italic>P</italic>&#60;.001 vs white users).</p>
        <table-wrap position="float" id="table18">
          <label>Table 18</label>
          <caption>
            <p>Google+ category frequency by gender, age, ethnicity, and location.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="310"/>
            <col width="230"/>
            <col width="230"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attribute and demographic</td>
                <td>Total number of participants</td>
                <td>Share experiences, n (%)</td>
                <td>Educational material, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>61,479</td>
                <td>15,234 (24.78)</td>
                <td>16,200 (26.35)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>32,082</td>
                <td>9803 (30.56)</td>
                <td>8029 (25.03)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Age group (years)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0-17</td>
                <td>42</td>
                <td>19 (45.24)</td>
                <td>8 (19.05)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-34</td>
                <td>552</td>
                <td>189 (34.24)</td>
                <td>141 (25.54)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>35-44</td>
                <td>308</td>
                <td>101 (32.79)</td>
                <td>46 (14.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>45-64</td>
                <td>499</td>
                <td>62 (12.42)</td>
                <td>171 (34.27)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥65</td>
                <td>45</td>
                <td>9 (20.00)</td>
                <td>13 (28.89)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Ethnicity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td>2825</td>
                <td>730 (25.84)</td>
                <td>1010 (35.75)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black</td>
                <td>72</td>
                <td>28 (38.89)</td>
                <td>13 (18.06)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>3389</td>
                <td>1137 (33.55)</td>
                <td>707 (20.86)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>17,230</td>
                <td>5076 (29.46)</td>
                <td>3340 (19.38)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Northeast</td>
                <td>4510</td>
                <td>1097 (24.32)</td>
                <td>957 (21.22)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>4210</td>
                <td>1310 (31.12)</td>
                <td>716 (17.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>9532</td>
                <td>2636 (27.65)</td>
                <td>1913 (20.07)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>7959</td>
                <td>2279 (28.63)</td>
                <td>1708 (21.46)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our analysis shows several interesting results. From our initial samples, we found that health-related posts from general social networks often shared news and educational material, and posts on health-related online forums frequently shared experiences, asked for medical advice, and requested or gave psychological support (<xref ref-type="table" rid="table5">Table 5</xref>). Our evaluation of three classification algorithms on the post content categories described by our study showed that, in terms of balanced accuracy, SVM tended to perform well on WebMD, whereas CNN performed better on DailyStrength data. Of the 2 Twitter categories used in our experiments, share experiences and share news, SVM performed the best in share experiences and CNN was the best in share news. None of the classifiers we evaluated performed particularly well when trained with the Google+ data; only the CNN classifier was able to meet our performance threshold in the Google+ educational material category. However, in the share experiences category, classifiers trained on the DailyStrength and Twitter data were able to meet our performance threshold in the Google+ share experiences category, suggesting that at least some transferability is possible with classifiers trained on other datasets.</p>
        <p>A further analysis of our health-related online forum data showed distinct differences between users of WebMD and DailyStrength. On WebMD, we found that the majority of posts made by male users and almost half of all posts made by female users asked for advice. This would seem to contradict an earlier study that found that women were the predominant users of the internet for health advice [<xref ref-type="bibr" rid="ref39">39</xref>], but when considering the overall number of posts from male and female WebMD users included in our study (41,422 posts by men vs 93,293 by women), we saw that posts asking for advice were still more likely to be written by a woman than a man. DailyStrength users shared experiences frequently in all demographics analyzed in our study, even more so than WebMD users; however, asking for advice was less common than on WebMD. These differences may be explained by the differences in the 2 health-related online forums; although DailyStrength offers support groups for a variety of topics, WebMD communities are often frequented by experts who can provide advice to users.</p>
        <p>An analysis of health-related posts on general social networks, Twitter and Google+, suggested differences that they have from health-related online forums. Compared with WebMD and DailyStrength, sharing experiences, which identifies posts in which a user shared a personal experience related to a health-related topic, is far less frequent in posts from Twitter and Google+ that contain one or more of the health-related keywords used in this study. The relatively low frequency of sharing experiences in our sample of several health-related topics on general social networks compared with the frequency of sharing experiences on health-related online forums may be due to a variety of factors, such as Twitter’s lack of health-related communities because of its structure as well as WebMD’s and DailyStrength’s focus on answering medical questions and providing support, respectively. Some subsets of health-related tweets studied in other work have low proportions of sharing experiences similar to our observations, such as tweets about depression [<xref ref-type="bibr" rid="ref7">7</xref>], schizophrenia [<xref ref-type="bibr" rid="ref7">7</xref>], and dementia [<xref ref-type="bibr" rid="ref28">28</xref>], as well as tweets from health-related Twitter users [<xref ref-type="bibr" rid="ref8">8</xref>]. However, other work has shown that the proportion can be much higher, such as in tweets about dental pain [<xref ref-type="bibr" rid="ref40">40</xref>] and prescription drug use [<xref ref-type="bibr" rid="ref29">29</xref>]. Many health-related topics had high proportions of posts that shared experiences in our Google+ data, for example, <italic>headache</italic>, 93.22% (6572/7050); <italic>migraine</italic>, 78.77% (2029/2576); <italic>insomnia</italic>, 71.41% (2430/3403); <italic>cold sore</italic>, 58.0% (370/638); and <italic>diazepam</italic>, 51.1% (95/186). This suggests that the proportion of sharing experiences in health-related posts may be highly dependent on the topic or topics studied; thus, our findings on the share experiences category may not generalize to other studies on health-related social media posts.</p>
        <p>Our comparison of results between our stratified sample of Twitter data with tweets from suspected bots removed and our full Twitter dataset showed that automated accounts had a significant impact on the share news category. Other work has also shown that bots can have an effect on health-related Twitter conversations, particularly on the subject of vaccination. Bots post both pro- and antivaccine tweets [<xref ref-type="bibr" rid="ref41">41</xref>] and retweet vaccine-related tweets at higher frequencies than human users [<xref ref-type="bibr" rid="ref42">42</xref>]. The use of bots in this manner amplifies the debate and further polarizes the communities involved. It is clear that bot activity must be considered when analyzing health-related conversations on Twitter.</p>
        <p>The differences in how often educational material is shared on Google+ between the demographics we studied highlight potential targets for informational health care campaigns. A health care campaign is a health care–related broad nationally or subnationally driven, led, or coordinated activity [<xref ref-type="bibr" rid="ref43">43</xref>]. Users in the age demographic of 35 to 44 years, who share educational material less often than other age groups, may benefit from being provided with medical information that they are not aware of. Demographics that share educational material more frequently than others, such as Asian Google+ users, may also be of interest to medical experts. If a further analysis of the educational material shared by these groups shows that the information is inaccurate or misleading, providing correct information may benefit them.</p>
        <p>Our results provide useful information that can help health care providers to reach the right demographic group. For example, researchers looking for clinical trial participants can use health-related online forums, where many posts are about sharing experiences. Moreover, demographic-specific results can help guide the targeted educational campaigns. As an example, male WebMD users ask specific medical advice questions more often than females, so male WebMD users may be more receptive to a campaign offering advice from medical experts.</p>
        <p>The classifier models used in this study can also be useful for researchers who want to study posts that contain the categories we studied. For example, a researcher who wants to study experiences about a particular drug can use these classifiers to find posts that share experiences from a larger dataset of posts that mention that drug. As another example, a researcher who wants to find out which disorders are frequently mentioned among users who share news can use a classifier to gather a dataset of news-sharing posts. In general, we provided researchers with tools that enable them to answer hypotheses and do research on the subject of health-related social media posts. These tools are provided by the description of our methodology, which describes how one might build these classifier models, and by trained classifier models that are available on request. Similar tools may also be applicable to the categories in the scheme proposed by Lopes and Da Silva [<xref ref-type="bibr" rid="ref9">9</xref>]. We leave this as future work.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>As users of health-related social media use an informal writing style, our selected 274 words to filter Twitter and Google+ as described in the Methods section may not cover all health-related posts or their variability in topics. For example, the abbreviation <italic>IUI</italic> (intrauterine insemination) is widely used in health-related posts but not included in the health-related keyword list. Another limitation is the different uses of terms used to filter Twitter and Google+. For example, the word “cancer” yields many tweets that talk about zodiac signs.</p>
        <p>We found that some Twitter categories have a high proportion of tweets from automated accounts. Although we have attempted to filter out tweets from such accounts, some such tweets may still exist in the data used in our analysis, and tweets from legitimate accounts may have been filtered out. Our initial evaluation of bot prevalence also found that the educational material category had a high proportion of tweets from bots. This may be also true of that category in the Google+ data, which was not filtered for bots; thus, those results may not accurately represent the demographics studied.</p>
        <p>Our demographic populations may not be fully representative of all users from the sources in our study. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, some of our demographics were estimated using classifiers, and these estimates are not always correct. Other demographics in our study are optionally reported by users. This introduces a bias toward users who choose to report their age, gender, and/or location, as noted in our results from Google+. We also assumed these reported demographics are correct for each such user.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we analyzed the content shared in two different types of health-related social media: health-related online forums and general social networks. For the two types of health-related social media, we manually identified 4 post categories: share experiences, ask for specific medical advice, request or give psychological support, and about family; and we additionally identified 5 categories for general social networks: share news, jokes, advertisements, personal opinion, and educational material. After labeling randomly selected data for each source, we built classifiers for each category. Finally, we made demographic-based content analyses where possible.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This project was partially supported by the National Science Foundation grant numbers IIS-1619463, IIS-1746031, IIS-1838222, and IIS-1901379. Any opinions, findings, and conclusions or recommendations expressed in this paper are those of the authors and do not necessarily reflect the views of the National Science Foundation.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>RR conducted the experiments and analysis and wrote the manuscript. SS conducted earlier versions of the experiments and analysis and assisted in the writing of the manuscript. YG coordinated the labeling of the training datasets and conducted preliminary research. VH conceived the study and provided coordination and guidance in the experiments and writing of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moorhead</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Hazlett</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Irwin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hoving</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>04</month>
          <day>23</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e85</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/4/e85/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id>
          <pub-id pub-id-type="medline">23615206</pub-id>
          <pub-id pub-id-type="pii">v15i4e85</pub-id>
          <pub-id pub-id-type="pmcid">PMC3636326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Fichman</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Gallaugher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Glaser</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Community relations 2.0</article-title>
          <source>Harv Bus Rev</source>
          <year>2009</year>
          <month>11</month>
          <volume>87</volume>
          <issue>11</issue>
          <fpage>45</fpage>
          <lpage>50, 132</lpage>
          <pub-id pub-id-type="medline">19891388</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hackworth</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Kunz</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>Health care and social media: building relationships via social networks</article-title>
          <source>Acad Health Care Manag J</source>
          <year>2011</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.abacademies.org/articles/volume-7-issue-2.pdf#page=7"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Esterling</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Pharmaceutical drugs chatter on Online Social Networks</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>06</month>
          <volume>49</volume>
          <fpage>245</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00063-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.03.006</pub-id>
          <pub-id pub-id-type="medline">24637141</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00063-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Labarthe</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Jha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dziurzynski</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Sap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weeg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Seligman</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Psychological language on Twitter predicts county-level heart disease mortality</article-title>
          <source>Psychol Sci</source>
          <year>2015</year>
          <month>02</month>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>159</fpage>
          <lpage>69</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25605707"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0956797614557867</pub-id>
          <pub-id pub-id-type="medline">25605707</pub-id>
          <pub-id pub-id-type="pii">0956797614557867</pub-id>
          <pub-id pub-id-type="pmcid">PMC4433545</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gerido</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Exploring text classification of social support in online health communities for people who are D/deaf and hard of hearing</article-title>
          <source>Proc Assoc Info Sci Tech</source>
          <year>2017</year>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>840</fpage>
          <lpage>1</lpage>
          <pub-id pub-id-type="doi">10.1002/pra2.2017.14505401179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reavley</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pilkington</surname>
              <given-names>PD</given-names>
            </name>
          </person-group>
          <article-title>Use of Twitter to monitor attitudes toward depression and schizophrenia: an exploratory study</article-title>
          <source>PeerJ</source>
          <year>2014</year>
          <volume>2</volume>
          <fpage>e647</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.7717/peerj.647"/>
          </comment>
          <pub-id pub-id-type="doi">10.7717/peerj.647</pub-id>
          <pub-id pub-id-type="medline">25374786</pub-id>
          <pub-id pub-id-type="pii">647</pub-id>
          <pub-id pub-id-type="pmcid">PMC4217192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>DeCamp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chisolm</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>ZD</given-names>
            </name>
          </person-group>
          <article-title>What are health-related users tweeting? A qualitative content analysis of health-related users and their messages on twitter</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>10</month>
          <day>15</day>
          <volume>16</volume>
          <issue>10</issue>
          <fpage>e237</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/10/e237/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3765</pub-id>
          <pub-id pub-id-type="medline">25591063</pub-id>
          <pub-id pub-id-type="pii">v16i10e237</pub-id>
          <pub-id pub-id-type="pmcid">PMC4296104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopes</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>da Silva</surname>
              <given-names>BG</given-names>
            </name>
          </person-group>
          <article-title>A classification scheme for analyses of messages exchanged in online health forums</article-title>
          <source>Inf Res</source>
          <year>2019</year>
          <volume>24</volume>
          <issue>1</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://informationr.net/ir/24-1/isic2018/isic1827.html"/>
          </comment>
          <pub-id pub-id-type="pii">isic1827</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Hummer</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>VW</given-names>
            </name>
          </person-group>
          <article-title>Mortality Attributable to Low Levels of Education in the United States</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>7</issue>
          <fpage>e0131809</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0131809"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0131809</pub-id>
          <pub-id pub-id-type="medline">26153885</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-06467</pub-id>
          <pub-id pub-id-type="pmcid">PMC4496052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anderson-Bill</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Winett</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Wojcik</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Social cognitive determinants of nutrition and physical activity among web-health users enrolling in an online intervention: the influence of social support, self-efficacy, outcome expectations, and self-regulation</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>03</month>
          <day>17</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>e28</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2011/1/e28/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1551</pub-id>
          <pub-id pub-id-type="medline">21441100</pub-id>
          <pub-id pub-id-type="pii">v13i1e28</pub-id>
          <pub-id pub-id-type="pmcid">PMC3221350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadah</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Shahbazi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>A study of the demographics of Web-based health-related social media users</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>08</month>
          <day>6</day>
          <volume>17</volume>
          <issue>8</issue>
          <fpage>e194</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/8/e194/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4308</pub-id>
          <pub-id pub-id-type="medline">26250986</pub-id>
          <pub-id pub-id-type="pii">v17i8e194</pub-id>
          <pub-id pub-id-type="pmcid">PMC4705027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadah</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Shahbazi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Demographic-based content analysis of web-based health-related social media</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>06</month>
          <day>13</day>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>e148</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/6/e148/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5327</pub-id>
          <pub-id pub-id-type="medline">27296242</pub-id>
          <pub-id pub-id-type="pii">v18i6e148</pub-id>
          <pub-id pub-id-type="pmcid">PMC4923586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadilek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kautz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Silenzio</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Modeling Spread of Disease From Social Interactions</article-title>
          <source>Proceedings of the Sixth International AAAI Conference on Weblogs and Social Media</source>
          <year>2012</year>
          <conf-name>ICWSM'12</conf-name>
          <conf-date>June 4-7, 2012</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM12/paper/view/4493/4999"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen-Yildiz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Text classification for assisting moderators in online health communities</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>12</month>
          <volume>46</volume>
          <issue>6</issue>
          <fpage>998</fpage>
          <lpage>1005</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00139-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.08.011</pub-id>
          <pub-id pub-id-type="medline">24025513</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00139-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC3874858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>671</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25755127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
          <pub-id pub-id-type="medline">25755127</pub-id>
          <pub-id pub-id-type="pii">ocu041</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mislove</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Onnela</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenquist</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Understanding the Demographics of Twitter Users</article-title>
          <source>Proceedings of the 5th international AAAI conference on weblogs and social media</source>
          <year>2011</year>
          <conf-name>ICWSM'11</conf-name>
          <conf-date>July 17-21, 2011</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM11/paper/view/2816/3234"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanthawala</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vermeesch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Given</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Huh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Answers to health questions: internet search results versus online health community responses</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>04</month>
          <day>28</day>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>e95</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/4/e95/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5369</pub-id>
          <pub-id pub-id-type="medline">27125622</pub-id>
          <pub-id pub-id-type="pii">v18i4e95</pub-id>
          <pub-id pub-id-type="pmcid">PMC4865652</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bissoyi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Patra</surname>
              <given-names>MR</given-names>
            </name>
          </person-group>
          <article-title>Recommender Systems in a Patient Centric Social Network - A Survey</article-title>
          <source>Proceedings of the 2016 International Conference on Signal Processing, Communication, Power and Embedded System</source>
          <year>2016</year>
          <conf-name>SCOPES'16</conf-name>
          <conf-date>October 3-5, 2016</conf-date>
          <conf-loc>Paralakhemundi, India</conf-loc>
          <fpage>386</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1109/SCOPES.2016.7955858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <source>RxList - The Internet Drug Index for Prescription Drug Information, Interactions, and Side Effects</source>
          <access-date>2015-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.rxlist.com/script/main/hp.asp">http://www.rxlist.com/script/main/hp.asp</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6W24Layfl"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <source>Twitter Developer</source>
          <access-date>2015-06-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/">https://developer.twitter.com/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6ZHpD7gjO"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <access-date>2015-02-02</access-date>
          <comment>Google+ API &#124; Google+ Platform for Web. &#124; Google Developers<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.webcitation.org/78S7b05G0">https://www.webcitation.org/78S7b05G0</ext-link>
                         <ext-link ext-link-type="webcite" xlink:href="6W24x2YYc"/>                         </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hedley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>jsoup Java HTML Parser, with best of DOM, CSS, and jquery</source>
          <access-date>2015-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jsoup.org/">http://jsoup.org/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6W2528eFu"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <source>Twitter</source>
          <access-date>2016-05-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://twitter.com/">https://twitter.com/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6hd6ik8QF"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>Google Plus</source>
          <access-date>2016-05-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://plus.google.com/">https://plus.google.com/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6hd6ooiY6"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <access-date>2015-02-02</access-date>
          <comment>DailyStrength: Support Groups.<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.dailystrength.org/support-groups">http://www.dailystrength.org/support-groups</ext-link>
                         <ext-link ext-link-type="webcite" xlink:href="6W23ztpRe"/>                         </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <source>WebMD - Better Information. Better Health</source>
          <access-date>2015-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.webmd.com/">http://www.webmd.com/</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="6W247u0HQ"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robillard</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Hennessey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Beattie</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Illes</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Aging 2.0: health information about dementia on Twitter</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e69861</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0069861"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0069861</pub-id>
          <pub-id pub-id-type="medline">23922827</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-05677</pub-id>
          <pub-id pub-id-type="pmcid">PMC3724927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alvaro</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lofi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Overington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Collier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Crowdsourcing Twitter annotations to identify first-hand experiences of prescription drug use</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58</volume>
          <fpage>280</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00241-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.11.004</pub-id>
          <pub-id pub-id-type="medline">26556646</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00241-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Varol</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Flammini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Menczer</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>BotOrNot: A System to Evaluate Social Bots</article-title>
          <source>Proceedings of the 25th International Conference Companion on World Wide Web</source>
          <year>2016</year>
          <conf-name>WWW'16 Companion</conf-name>
          <conf-date>April 11-15, 2016</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <fpage>273</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1145/2872518.2889302</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hayati</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Potdar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Talevski</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Behaviour-Based Web Spambot Detection by Utilising Action Time and Action Frequency</article-title>
          <source>Proceedings of the International Conference on Computational Science and Its Applications</source>
          <year>2010</year>
          <conf-name>ICCSA'10</conf-name>
          <conf-date>March 23-26, 2010</conf-date>
          <conf-loc>Fukuoka, Japan</conf-loc>
          <fpage>351</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-642-12165-4_28</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2011</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
          <pub-id pub-id-type="pii">1478-7954-9-29</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Convolutional Neural Networks for Sentence Classification</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2014</year>
          <conf-name>EMNLP'14</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1746</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Enriching word vectors with subword information</article-title>
          <source>Trans Assoc Comput Linguist</source>
          <year>2017</year>
          <volume>5</volume>
          <fpage>135</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Maki</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mazurowski</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>A systematic study of the class imbalance problem in convolutional neural networks</article-title>
          <source>Neural Netw</source>
          <year>2018</year>
          <month>10</month>
          <volume>106</volume>
          <fpage>249</fpage>
          <lpage>59</lpage>
          <pub-id pub-id-type="doi">10.1016/j.neunet.2018.07.011</pub-id>
          <pub-id pub-id-type="medline">30092410</pub-id>
          <pub-id pub-id-type="pii">S0893-6080(18)30210-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brodersen</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Stephan</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Buhmann</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>The Balanced Accuracy and Its Posterior Distribution</article-title>
          <source>Proceedings of the 20th International Conference on Pattern Recognition</source>
          <year>2010</year>
          <conf-name>ICPR'10</conf-name>
          <conf-date>August 23-26, 2010</conf-date>
          <conf-loc>Istanbul, Turkey</conf-loc>
          <fpage>3121</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1109/icpr.2010.764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sillence</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Briggs</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fishwick</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Going online for health advice: changes in usage and trust practices over the last five years</article-title>
          <source>Interact Comput</source>
          <year>2007</year>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>397</fpage>
          <lpage>406</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.sciencedirect.com/science/article/pii/S095354380600141X["/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.intcom.2006.10.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heaivilin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gerbert</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Public health surveillance of dental pain via Twitter</article-title>
          <source>J Dent Res</source>
          <year>2011</year>
          <month>09</month>
          <volume>90</volume>
          <issue>9</issue>
          <fpage>1047</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21768306"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0022034511415273</pub-id>
          <pub-id pub-id-type="medline">21768306</pub-id>
          <pub-id pub-id-type="pii">0022034511415273</pub-id>
          <pub-id pub-id-type="pmcid">PMC3169887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>AlKulaib</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Weaponized health communication: Twitter bots and Russian trolls amplify the vaccine debate</article-title>
          <source>Am J Public Health</source>
          <year>2018</year>
          <month>10</month>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1378</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2018.304567</pub-id>
          <pub-id pub-id-type="medline">30138075</pub-id>
          <pub-id pub-id-type="pmcid">PMC6137759</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuchard</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Crooks</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Examining emergent communities and social bots within the polarized online vaccination debate in Twitter</article-title>
          <source>Soc Media Soc</source>
          <year>2019</year>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>205630511986546</fpage>
          <pub-id pub-id-type="doi">10.1177/2056305119865465</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mathai</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Allegranzi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kilpatrick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bagheri Nejad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Graafmans</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pittet</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Promoting hand hygiene in healthcare through national/subnational campaigns</article-title>
          <source>J Hosp Infect</source>
          <year>2011</year>
          <month>04</month>
          <volume>77</volume>
          <issue>4</issue>
          <fpage>294</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jhin.2010.10.012</pub-id>
          <pub-id pub-id-type="medline">21353722</pub-id>
          <pub-id pub-id-type="pii">S0195-6701(10)00467-6</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
