<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JPH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Public Health Surveill</journal-id>
      <journal-title>JMIR Public Health and Surveillance</journal-title>
      <issn pub-type="epub">2369-2960</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v2i2e158</article-id>
    <article-id pub-id-type="pmid">27751984</article-id>
    <article-id pub-id-type="doi">10.2196/publichealth.5869</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Building a National Neighborhood Dataset From Geotagged Twitter Data for Indicators of Happiness, Diet, and Physical Activity</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Zhang</surname>
          <given-names>Ni</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Zhai</surname>
          <given-names>Haijun</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Jimeno</surname>
          <given-names>Antonio</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>MacKinlay</surname>
          <given-names>Andrew</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Seresinhe</surname>
          <given-names>Chanuki</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Nguyen</surname>
        <given-names>Quynh C</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>

        <institution>Department of Health, Kinesiology, and Recreation</institution>
        <institution>University of Utah College of Health</institution>
        <addr-line>1901 E. South Campus Drive</addr-line>
        <addr-line>Annex Room 2124</addr-line>
        <addr-line>Salt Lake City, UT,</addr-line>
        <country>United States</country>
        <phone>1 801 585 5134</phone>
        <fax>1 801 585 3646</fax>
        <email>quynh.ctn@gmail.com</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-4745-6681</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Li</surname>
          <given-names>Dapeng</given-names>
        </name>
        <degrees>MS, PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3255-6084</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Meng</surname>
          <given-names>Hsien-Wen</given-names>
        </name>
        <degrees>MS</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8952-4286</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Kath</surname>
          <given-names>Suraj</given-names>
        </name>
        <degrees>MS</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4731-5590</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Nsoesie</surname>
          <given-names>Elaine</given-names>
        </name>
        <degrees>MS, PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9170-8714</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Li</surname>
          <given-names>Feifei</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3079-3745</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib7">
        <name name-style="western">
          <surname>Wen</surname>
          <given-names>Ming</given-names>
        </name>
        <degrees>MA, MS, PhD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6397-3473</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>

    <institution>Department of Health, Kinesiology, and Recreation</institution>  
    <institution>University of Utah College of Health</institution>
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>

    <institution>Department of Geography</institution>  
    <institution>University of Utah</institution>
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
    <sup>3</sup>

    <institution>School of Computing</institution>  
    <institution>University of Utah</institution>
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff4">
    <sup>4</sup>

    <institution>Department of Global Health</institution>  
    <institution>University of Washington</institution>
    <addr-line>Seattle, WA</addr-line>
    <country>United States</country></aff>
    <aff id="aff5">
    <sup>5</sup>

    <institution>Department of Sociology</institution>  
    <institution>University of Utah</institution>
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Quynh C Nguyen 
      <email>quynh.ctn@gmail.com</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><season>Jul-Dec</season><year>2016</year></pub-date>
    <pub-date pub-type="epub">
      <day>17</day>
      <month>10</month>
      <year>2016</year>
    </pub-date>
    <volume>2</volume>
    <issue>2</issue>
    <elocation-id>e158</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>1</day>
        <month>5</month>
        <year>2016</year>
      </date>
      <date date-type="rev-request">
        <day>27</day>
        <month>7</month>
        <year>2016</year>
      </date>
      <date date-type="rev-recd">
        <day>29</day>
        <month>8</month>
        <year>2016</year>
      </date>
      <date date-type="accepted">
        <day>15</day>
        <month>9</month>
        <year>2016</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Quynh C Nguyen, Dapeng Li, Hsien-Wen Meng, Suraj Kath, Elaine Nsoesie, Feifei Li, Ming Wen. Originally published in JMIR Public Health and Surveillance (http://publichealth.jmir.org), 17.10.2016.</copyright-statement>
    <copyright-year>2016</copyright-year>
    <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Public Health and Surveillance, is properly cited. The complete bibliographic information, a link to the original publication on http://publichealth.jmir.org, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://publichealth.jmir.org/2016/2/e158/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Studies suggest that where people live, play, and work can influence health and well-being. However, the dearth of neighborhood data, especially data that is timely and consistent across geographies, hinders understanding of the effects of neighborhoods on health. Social media data represents a possible new data resource for neighborhood research.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The aim of this study was to build, from geotagged Twitter data, a national neighborhood database with area-level indicators of well-being and health behaviors.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We utilized Twitter’s streaming application programming interface to continuously collect a random 1% subset of publicly available geolocated tweets for 1 year (April 2015 to March 2016). We collected 80 million geotagged tweets from 603,363 unique Twitter users across the contiguous United States. We validated our machine learning algorithms for constructing indicators of happiness, food, and physical activity by comparing predicted values to those generated by human labelers. Geotagged tweets were spatially mapped to the 2010 census tract and zip code areas they fall within, which enabled further assessment of the associations between Twitter-derived neighborhood variables and neighborhood demographic, economic, business, and health characteristics.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>Machine labeled and manually labeled tweets had a high level of accuracy: 78% for happiness, 83% for food, and 85% for physical activity for dichotomized labels with the <italic>F</italic> scores 0.54, 0.86, and 0.90, respectively. About 20% of tweets were classified as happy. Relatively few terms (less than 25) were necessary to characterize the majority of tweets on food and physical activity. Data from over 70,000 census tracts from the United States suggest that census tract factors like percentage African American and economic disadvantage were associated with lower census tract happiness. Urbanicity was related to higher frequency of fast food tweets. Greater numbers of fast food restaurants predicted higher frequency of fast food mentions. Surprisingly, fitness centers and nature parks were only modestly associated with higher frequency of physical activity tweets. Greater state-level happiness, positivity toward physical activity, and positivity toward healthy foods, assessed via tweets, were associated with lower all-cause mortality and prevalence of chronic conditions such as obesity and diabetes and lower physical inactivity and smoking, controlling for state median income, median age, and percentage white non-Hispanic.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>Machine learning algorithms can be built with relatively high accuracy to characterize sentiment, food, and physical activity mentions on social media. Such data can be utilized to construct neighborhood indicators consistently and cost effectively. Access to neighborhood data, in turn, can be leveraged to better understand neighborhood effects and address social determinants of health. We found that neighborhoods with social and economic disadvantage, high urbanicity, and more fast food restaurants may exhibit lower happiness and fewer healthy behaviors.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>social media</kwd>
      <kwd>Twitter messaging</kwd>
      <kwd>health behavior</kwd>
      <kwd>happiness</kwd>
      <kwd>food</kwd>
      <kwd>physical activity</kwd>
    </kwd-group></article-meta>
  </front>
  <body>

    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>There is an increasing recognition that health is determined by a myriad of factors, including where you live, play, and work [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Poor access to healthy food [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref10">10</xref>], abundance of fast food chains [<xref ref-type="bibr" rid="ref11">11</xref>], lack of recreational facilities [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>], and higher crime rates [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] have been shown to predict higher obesity rates. Environmental exposure to toxins, noise, and violence can be detrimental to health [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Conversely, neighborhood resources such as playgrounds for children, grocery stores, and gyms can be beneficial to health [<xref ref-type="bibr" rid="ref17">17</xref>]. Adverse neighborhood conditions converge in poor, minority neighborhoods [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref21">21</xref>], thereby increasing health disparities.</p>
      <p>Social environments can offer social and emotional support that buffers stressful life events [<xref ref-type="bibr" rid="ref22">22</xref>]. Johns and colleagues found that neighborhoods with higher social cohesion had lower posttraumatic stress disorder [<xref ref-type="bibr" rid="ref23">23</xref>]. Higher community happiness levels are linked with lower obesity, hypertension, and suicide rates as well as increased life expectancy [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Evidence also suggests that emotional states such as happiness, optimism, depression, or suicidality can spread through social networks [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. The social environment can offer opportunities for social control in regulating unhealthy behaviors and facilitating the social learning of healthy behaviors but can also promote risky behaviors. Health behaviors, such as food consumption, health screening, smoking, alcohol consumption, drug use, and sleep have also been observed to spread through social networks [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>].</p>
      <p>The extreme scarcity of neighborhood data greatly limits research on neighborhood effects. Some places [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] have extensive neighborhood data collected on them, but they are the anomaly rather than the rule, and it is difficult to make comparisons across geographies because available measures vary greatly across them. Neighborhood data collection is expensive and time consuming and only available for certain time periods [<xref ref-type="bibr" rid="ref40">40</xref>]. Widespread usage of the Internet and open recording of many transactions (eg, Yelp reviews, Foursquare check-ins, and reporting of personal opinions and behaviors through social media) has led to the availability of massive amounts of data that enable understanding of previously hidden local area interactions. Researchers are increasingly utilizing social media and user-generated data to track health behaviors and perform health surveillance (eg, for outbreak detection) [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. Others have used social media to track sleep issues [<xref ref-type="bibr" rid="ref46">46</xref>], personal health status disclosed by Twitter users [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>], and patient-perceived quality of care [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
      <p>In this study, we explored the utility of building a national neighborhood database from geotagged Twitter data to characterize well-being and health behaviors. We validated our machine learning algorithm for constructing indicators of happiness, food, and physical activity by comparing machine-generated values to values generated by human labelers. In addition, we explored associations between Twitter-derived neighborhood variables and neighborhood demographic and economic characteristics. This project makes significant, relevant contributions to the field because neighborhood environments are increasingly linked to an array of important health outcomes and this project addresses the limits to research resulting from the lack of neighborhood data by providing new, cost-efficient data resources and methods for characterizing neighborhoods. To our knowledge, our study was the first to attempt to create a national neighborhood database from Twitter data, with indicators constructed for public health researchers. The only other type of neighborhood data that is consistently available for local areas is census data on the compositional characteristics of neighborhoods. Twitter is uniquely suited to characterize the social environment, including prevalent sentiment and health behaviors.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Social Media Data Collection</title>
        <p>From February 2015 to March 2016, we utilized Twitter’s streaming application programming interface (API) to continuously collect a random 1% sample of publicly available tweets with latitude and longitude coordinates. Given that neighborhood researchers differ in their use and interest in data at the census tract and zip code level, we constructed neighborhood indicators at both levels thereby increasing the flexibility of our dataset to address the potential data needs of other researchers. In total, we collected 79,848,992 million geotagged tweets from 603,363 unique Twitter users in the contiguous United States (including District of Columbia). The median number of tweets per user was 4. Job postings (identified through hashtags #hiring, #jobs, and #job) were removed from the final analytic sample of tweets because these were pervasive and not central to the neighborhood variables we constructed.</p>
      </sec>
      <sec>
        <title>Spatial Join and Neighborhood Definition</title>
        <p>Each geotagged tweet was assigned a corresponding census tract and zip code it falls within, based on the latitude and longitude coordinates of where the tweet was sent. This spatial join procedure was implemented in Python (version 2.7.12; Python Software Foundation), a popular programming language for spatial data processing [<xref ref-type="bibr" rid="ref50">50</xref>]. Specifically, Python libraries were used to read shapefile format vector data (PyShp 1.1.4), build an R-tree index on the polygon data (Rtree 0.8.2), and perform a spatial join operation (Shapely 1.5.12 and Fiona 1.6.1). The R-Tree was used to build a spatial index [<xref ref-type="bibr" rid="ref51">51</xref>] on the national census tract and zip code polygon data to speed computation. Tweets that were not assigned a census tract or zip code location included those with destinations bordering the United States (ie, Mexico and Canada). We linked 99.8% of tweets with geocordinates to their respective 2010 census tract and zip code locations. The term <italic>neighborhood</italic> used in this paper refers to both zip codes and census tracts. We mapped tweets to these two geographic boundaries because they are among the most popular neighborhood definitions utilized by public health researchers [<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref54">54</xref>].</p>
      </sec>
      <sec>
        <title>Processing Tweets</title>
        <p>Duplicate tweets (ie, tweets with the same tweet ID, &#60;1%) were removed computationally. Although Twitter’s API collects a random subset of 1% of publicly available tweets, users (especially spam accounts) who tweet often have potentially greater influence on variable values we construct. We examined outliers in our datasets (defined as the users whose tweets accounted for more than 1% of tweets in our dataset) and eliminated automated accounts and accounts for which the majority of tweets were advertisements. Processing and statistical analysis tasks were performed with Stata MP13 (StataCorp LP).</p>
      </sec>
      <sec>
        <title>Construction of Neighborhood Variables From Twitter Data</title>
        <p>From geotagged tweets, we derived variables that characterize happiness, food, and physical activity. Each tweet was divided into tokens using the Stanford tokenizer [<xref ref-type="bibr" rid="ref55">55</xref>]. For processing of English text, tokens roughly correspond to words. We then built various algorithms utilizing tokens to create variables that characterize happiness and make references to food and physical activity. Below we describe in more detail our algorithms.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>To conduct sentiment analysis, we utilized the Machine Learning for Language Toolkit (MALLET; AK McCallum, 2002), a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text. We leveraged the Maximum Entropy text classifier in MALLET to classify tweets as happy and not happy [<xref ref-type="bibr" rid="ref56">56</xref>]. In order to train our classifier, we obtained training sets from the following resources: Sentiment140 [<xref ref-type="bibr" rid="ref57">57</xref>], Sanders Analytics [<xref ref-type="bibr" rid="ref58">58</xref>], and Kaggle [<xref ref-type="bibr" rid="ref59">59</xref>]. We trained our classifier to differentiate between happy and not happy sentiments. We then ran our classifier on our national Twitter data to compute a happy score (range 0-1) for each tweet, where higher happiness scores indicate more positive sentiment. MALLET estimates predicted probabilities that a tweet is happy based upon word-level features. The classifier uses search-based optimization to assign weights that maximize the likelihood of the training data. However, unlike Naïve Bayes, the Maximum Entropy classifier does not assume conditional independence among features.</p>
        <p>To calibrate the generated happiness scores with human generated labels, two raters manually read a random subset of 1200 tweets and assigned a value of 1 to happy tweets and 0 to not happy tweets. The initial interrater reliability was 92%, and discordant values were reviewed until a 100% agreement between raters was reached. To decide on a cut point for MALLET scores at which we would classify tweets as happy, we computed accuracy levels at different cut points of MALLET scores (<xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>). Increasing the MALLET score improves the accuracy against human annotations but also reduces the calculated prevalence of tweets deemed as happy. A MALLET score of 0.80 achieves the highest level of accuracy while still maintaining a prevalence of happy tweets of 19% (which approximates the prevalence obtained by human annotations). Area under the receiver operating characteristic curve is approximately 0.7 for all MALLET cut points between 60 and 85.</p>
      </sec>
      <sec>
        <title>Food Analysis</title>
        <p>We compiled a list of over 1430 popular food words from the US Department of Agriculture’s National Nutrient Database [<xref ref-type="bibr" rid="ref60">60</xref>]. Each food item was associated with a measure of caloric density, operationalized as calories per 100 grams. Fruits, vegetables, nuts, and lean proteins (ie, fish, chicken, and turkey) were labeled as healthy foods (340 food terms in total). Fried foods were not considered healthy foods. Our food list also contained popular national fast food restaurants such as McDonald’s and Kentucky Fried Chicken (captured via 154 food terms including popular variations of restaurant names) to enable quantification of fast food references. From April 2015 to March 2016, we collected and processed 4,041,521 geotagged food tweets. In the food dataset, the median number of tweets per user was 12 tweets.</p>
        <p>To analyze food culture, each tweet was examined for words or phrases matching those on our list. Each food item on our list was described by one or two words. Our text-matching algorithm first searched over a tweet for matches to two-word foods (eg, orange chicken). It then searched over the remaining words for matches to one-word food terms (eg, taco). We computed caloric density by summing up all the foods mentioned in the tweet. We also created a count of healthy food references and fast food restaurant references for each tweet. Moreover, we leveraged our sentiment analysis to assess sentiment toward food. Specifically, we tracked sentiment around healthy foods and fast food. These variables (any food references, healthy food references, fast food references, caloric density, and sentiment toward healthy foods and fast food) were then aggregated and summarized at the census tract and zip code level to create neighborhood indicators of food culture.</p>
      </sec>
      <sec>
        <title>Physical Activity Analysis</title>
        <p>We created a list of physical activities using published lists of physical activity terms gathered from physical activity questionnaires, compendia of physical activities, and popularly available fitness programs [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>]. Our physical activity list had 376 different activities that incorporate gym-related exercise (eg, treadmill, weight lifting), sports (eg, baseball), recreation (eg, hiking, scuba diving) and household chores (eg, gardening). We excluded popular phrases that generally do not relate to physical activity such as “walk away” and “running late.” Using metabolic equivalents associated with physical activities, we quantified the exercise intensity of each physical activity mention, scaled for a duration of 30 minutes and for a 155-pound individual [<xref ref-type="bibr" rid="ref63">63</xref>], which approximates the weight of an average American adult [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>].</p>
        <p>Upon piloting our algorithm, we identified commonly used phrases or pop culture references that do not involve physical activity (eg, walking dead) which were manually coded and excluded. Moreover, in order to help reduce the possibility that the tweet was about watching rather than actually participating in the physical activity, we excluded the tweet if it contained any of the following terms: “watch,” “watching,” “watches,” “watched,” “attend,” “attending,” “attends,” and “attended.” In reviewing preliminary labeled physical activity data, we found that most tweets (over 90%) pertaining to team sports (eg, baseball, basketball, football, soccer) were about watching games rather than participating in them. Thus, for team sports, we required that the tweet include the words “play,” “playing,” or “played.”</p>
        <p>Our algorithm created the following physical activity variables for each tweet: any physical activity mention, exercise intensity, and sentiment around physical activity. From April 2015 to March 2016, we collected 1,473,976 geotagged physical activity tweets. In the physical activity dataset, the median number of tweets per user was 5 tweets.</p>
      </sec>
      <sec>
        <title>Quality Control Activities</title>
        <p>A total of 5000 tweets have been manually labeled by two of the authors for quality control activities on food and physical activity. The authors manually labeled whether each tweet was food-related (2000), non–food-related (500), physical activity-related (2000), or non–physical activity-related (500). Excellent interrater reliability was achieved with greater than 90% agreement in all categories, and differences were discussed and resolved.</p>
        <p>Among tweets our algorithm had labeled as food-related, 83% were labeled accurately when compared to labels generated by manual categorization. Among tweets our algorithm had labeled as non–food-related, 81% were labeled accurately (ie, both algorithm and human categorizers labeled the tweet as non–food-related). Overall, accuracy for food tweets was 83% and the <italic>F</italic> score was 0.86. It should be noted our algorithm could label a food-related tweet as non–food-related if the food reference was not in our food dictionary. Food items that are often associated with non-related food meaning, such as “perch,” have been excluded from our food dictionary. For tweets that had been mislabeled as food-related, commons reasons included food term used as a metaphor, in a pun, or for food advertisement.</p>
        <p>Among tweets our algorithm had labeled as physical activity-related, 82% of them were labeled accurately when compared to labels generated by human categorizers. An accuracy of 97% was found among tweets labeled as non-physical activity-related by our algorithm. The <italic>F</italic> score was 0.90 and the overall accuracy was 85% for physical activity tweets. Typical errors in classification of physical activity tweets included the use of an idiom (eg, running late) or the tweet was about watching sports games rather than playing sports.</p>
        <p>Additionally, we evaluated our algorithm on its ability to identify relevant food and physical activity terms within tweets. To do this, we examined a random subset of tweets that the algorithm had identified as positive for food (n=200) and physical activity (n=200). Here we focused on the accuracy of our algorithm to conduct string detection. We manually read the tweets to verify that manual annotations agreed with the terms detected. For food tweets, 87% of manual annotations matched all detected terms from the algorithm. Errors for nondetection of terms occurred when the tweet included a hashtag that had multiple food terms without spacing (eg, #chocolatebrownie) or when there were misspellings (eg, sandwhich) or when the food was not on the food list. String detection for physical activity-related terms was more accurate with 98% of manual annotations matching detected terms from the algorithm. Errors included the omission of certain terms from the dictionary (eg, cycling) and use of hashtags without spacing of terms (#runrunrun).</p>
        <p>We further evaluated our sentiment analysis activities through Amazon Mechanical Turk (Mturk; Amazon.com Inc, Seattle, WA, USA), an online crowdsourcing marketplace [<xref ref-type="bibr" rid="ref66">66</xref>]. We randomly selected 500 tweets with 50% labeled as happy and 50% as not happy by our algorithm. Then, we created 20 online surveys through random sorting, with each survey consisting of 25 tweets. We asked participants to rate the sentiment of each tweet. All 20 surveys were live on April 1, 2015. Each online survey closed itself when 15 responses had been reached; the last survey closed on April 5, 2015. For each completed survey, 25 cents ($0.25) was deposited into the participant’s Mturk account. A total of 32 participants completed 300 surveys (ie, 15 responses per survey, 20 surveys). Some participants completed multiple surveys rather than just one. Each tweet was then assigned a label of either happy or not happy based on the modal response from Mturkers (participants from Amazon Mturk). We found an accuracy of 69% for happy tweets and 80% for nonhappy tweets when compared to responses from Mturkers. The overall accuracy for sentiment was 78%, with an <italic>F</italic> score of 0.54.</p>
        <p>We additionally compared performance of MALLET with two other sentiment analysis techniques: a popular bag-of-words technique involving the use of a 10,000 word list [<xref ref-type="bibr" rid="ref67">67</xref>] and Sentiment140, a machine-learning classifier [<xref ref-type="bibr" rid="ref68">68</xref>]. Among the 500 control tweets from our LabMT experiment, the bag-of-words algorithm had an accuracy of 73% (<italic>F</italic> score 0.55) and Sentiment140 was had an accuracy of 77% (<italic>F</italic> score 0.47).</p>
      </sec>
      <sec>
        <title>Other Publicly Available Neighborhood Data</title>
        <p>To examine how Twitter-derived neighborhood variables relate to more traditional neighborhood variables, we merged our social media dataset with the 2010 Census and 2014 American Community Survey data which comprised the following demographic, household, and economic characteristics: household size, median family income and percent of the following: 65 years and older age group, 10-24 years, male, African American, white, Hispanic, households with relatives (other than spouse and children), households with unmarried partner, single female-headed households, householder living alone, owner-occupied housing, college graduates, unemployed, less than a high school degree and families living in poverty. A census tract was urban if the geographic centroid of the tract was in an area with more than 2500 people; all other tracts are rural. A zip code was defined as urban if the majority (75% or more) of its land area was characterized as urban (ie, containing at least 2500 people).</p>
        <p>Data on business types at the zip code level were obtained from the 2013 US Census Bureau zip code business patterns accessed via American FactFinder [<xref ref-type="bibr" rid="ref69">69</xref>]. The following North American Industry Classification System (NAICS) codes were utilized to categorize businesses: 722410 (drinking places [alcoholic beverages]; these places are also known as bars, taverns, night clubs and primarily serve alcohol and may have limited food services) and 722511 (full-service restaurants; these include, for instance, diners and steakhouses). Fast food was defined by the following NAICS codes: 722513 (limited-service restaurants; these include carryout restaurants, drive-in restaurants, and other fast food restaurants) and 722515 (snack and nonalcoholic beverage bars). We also tracked supermarkets and grocery stores (NAICS code 445110) and convenience stores (NAICS code 445120). To examine associations between Twitter physical activity mentions and presence of recreational facilities, we retrieved business data for the following types of establishments: fitness and recreational sports centers (NAICS code 713940), nature parks (NAICS code 712190), zoos and botanical gardens (NAICS code 712130), golf courses and country clubs (NAICS code 713910), skiing facilities (NAICS code 713920), and bowling centers (NAICS code 713950).</p>
        <p>We obtained state-level health outcome data including age-adjusted death rates due to all-causes and homicides from 2013 National Vital Statistics Reports. Data in this report was based on information from all resident death certificates filed in the 50 states and the District of Columbia. Death certificates are generally completed by funeral directors, attending physicians, medical examiners, and coroners. Age-adjusted death rates expressed per 100,000 population were based on the 2000 US standard population. Causes of death statistics were classified by the International Classification of Diseases, Tenth Revision, and based on the underlying cause of death.</p>
        <p>We obtained age-adjusted prevalences of health risk behaviors and chronic conditions of US adult residents for the 50 states from the 2013 Behavioral Risk Factor Surveillance System (BRFSS), the nation's premier system of health-related telephone surveys. The questionnaires were created by BRFSS state coordinators and Centers for Disease Control and Prevention staff. BRFSS data includes self-reported physical activity, self-rated health, body mass index (BMI, kg/m<sup>2</sup>), and medical diagnoses of chronic conditions aggregated to the state level. Data from a national health survey suggests that BMI estimates derived from self-reported height and weight were lower than those are derived from measured height and weight, although BMI differences were generally less than 1.0 kg/m<sup>2</sup> across sex and age groups [<xref ref-type="bibr" rid="ref70">70</xref>]. State-level BRFSS data is publicly available. Smaller area aggregations can require data use agreements. In addition to state-level BRFSS data, we also utilized restricted-access zip-code–level data from the 2009-2014 Utah BRFSS survey to examine zip-code–level health outcomes [<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>].</p>
      </sec>
      <sec>
        <title>Regression Analyses</title>
        <p>We implemented adjusted linear regression models to examine associations between area-level Twitter characteristics and other area-level characteristics (demographics, business characteristics, and health outcomes). To facilitate interpretation of findings for different variables, we standardized all variables to have a mean of zero and standard deviation of one. We investigated spatial autocorrelation and found that Moran’s I was highest for census tract Twitter happiness (0.12) and less than 0.04 for other Twitter tract and zip code summaries. To account for spatial autocorrelation of area-level values in linear regression analyses, we adjusted standard errors for clustering of census tract and zip code values within a county. Statistical analyses were implemented with Stata MP13 (StataCorp LP) and ArcGIS Desktop version 10.1-10.3 (Esri).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> displays descriptive statistics. Approximately 20% of tweets were happy. About 5.1% of tweets were about food and 1.8% were about physical activity. The mean and median caloric density of food references were 239 and 209 calories per 100 grams, respectively. Tweets about healthy food were happier than tweets about fast food (28.3% vs 14.5%; <italic>P</italic>&#60;.001). The mean and median exercise intensity of physical activity mentions (assuming 30 minutes for a 155-pound person) were 199 and 130 calories, respectively.</p>
      <p><xref ref-type="fig" rid="figure1">Figure 1</xref> presents the spatial distribution of happy tweets by census tract, highlighting variation across the United States. <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref> presents the spatial distribution of happy tweets by zip code. The proportion of happy tweets was highest in the following states: Montana, Tennessee, Utah, New Hampshire, Arkansas, Maine, Colorado, and New York (<xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>). By contrast, the proportions of happy tweets were lowest for the following states: Louisiana, North Dakota, Oregon, Maryland, Texas, Delaware, West Virginia, and Ohio (<xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>).</p>
      <p><xref ref-type="table" rid="table2">Table 2</xref> presents the results of adjusted linear regression analyses examining the associations between population characteristics and Twitter-derived characteristics at the census tract level (percent of tweets that were happy, percent of tweets about healthy food, percent of tweets about fast food, and percent of tweets about physical activity). Census tract characteristics like percent African American (beta coefficient, B=−.11), greater household size (B=−.18), and economic disadvantage (B=−.19) were related to lower tract happiness. Economic disadvantage was negatively related to healthy food tweets (B=−.09), fast food tweets (B=−.09), and physical activity tweets (B=−.03). Urbanicity was strongly related to higher frequency of fast food tweets (B=.29). Greater household size was related to both lower healthy food tweets (B=−.11) and fast food tweets (B=−.07).</p>
   <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>National distribution of happy tweets, by census tract. Geotagged tweets were spatially joined to their 2010 census tract locations and sentiment scores were computed. This color coded map presents the proportion of happy tweets in each census tract, with darker colors signifying higher proportions of happy tweets.</p>
        </caption>
        <graphic xlink:href="publichealth_v2i2e158_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Descriptive statistics of our national Twitter database, April 2015 to March 2016 (N=79,848,992).</p>
        </caption>
        <table width="510" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="10"/>
          <col width="300"/>
          <col width="200"/>
          <thead>
            <tr valign="top">
              <td> </td>
              <td><break/></td>
              <td>Mean (SD) <break/></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="3"><bold>Happiness</bold></td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Tweets that are happy</td>
              <td>19.9 (6.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="3"><bold>Food culture</bold></td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Tweets about food</td>
              <td>5.1 (22.0)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Food tweets about healthy foods</td>
              <td>15.9 (36.6)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Food tweets about fast food</td>
              <td>9.2 (29.0)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>Caloric density of food tweets (per 100 grams)</td>
              <td>238.5 (219.8)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Food tweets that are happy</td>
              <td>27.0 (44.4)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Healthy food tweets that are happy</td>
              <td>28.3 (45.0)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Fast food tweets that are happy</td>
              <td>14.5 (35.2)</td>
            </tr>
            <tr valign="top">
              <td colspan="3"><bold>Physical activity culture</bold></td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Tweets about physical activity</td>
              <td>1.8 (13.3)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>Exercise intensity (per 30 minutes)</td>
              <td>199.1 (117.5)</td>
            </tr>
            <tr valign="top">
              <td><break/></td>
              <td>% Physical activity tweets that are happy</td>
              <td>28.2 (45.0)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Demographic and economic predictors of happy, food, and physical activity tweets from 70,515 census tracts (data source: 2010 US Census data).</p>
        </caption>
        <table width="620" cellpadding="5" cellspacing="0" border="0" rules="groups" frame="hsides">

            <col width="100"/>
            <col width="80"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>

          <thead>
            <tr valign="top">
              <td>Tract characteristics</td>
              <td>% happy tweets <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% healthy food tweets <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% fast food tweets <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% physical activity tweets <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Urban (yes)</td>
              <td>−.01 <break/>(−.04 to .03)</td>
              <td>.79</td>
              <td>.01 <break/>(−.02 to .03)</td>
              <td>.54</td>
              <td>.29 <break/>(.26 to .31)</td>
              <td>&#60;.001</td>
              <td>−.02 <break/>(−.03 to −.01)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Population density</td>
              <td>.06 <break/>(.03 to .08)</td>
              <td>&#60;.001</td>
              <td>.04 <break/>(.02 to .07)</td>
              <td>.001</td>
              <td>−.03 <break/>(−.03 to −.02)</td>
              <td>&#60;.001</td>
              <td>.00 <break/>(−.01 to .00)</td>
              <td>.82</td>
            </tr>
            <tr valign="top">
              <td>% 65 years and older</td>
              <td>.02 <break/>(−.01 to .04)</td>
              <td>.09</td>
              <td>−.03 <break/>(−.04 to −.02)</td>
              <td>&#60;.001</td>
              <td>−.03 <break/>(−.04 to −.01)</td>
              <td>&#60;.001</td>
              <td>.02 <break/>(.02 to .03)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>% 10-24 years</td>
              <td>−.02 <break/>(−.04 to .00)</td>
              <td>.01</td>
              <td>−.05 <break/>(−.05 to −.04)</td>
              <td>&#60;.001</td>
              <td>.00 <break/>(−.01 to .01)</td>
              <td>.49</td>
              <td>.00 <break/>(−.01 to .00)</td>
              <td>.14</td>
            </tr>
            <tr valign="top">
              <td>% Male</td>
              <td>.04 <break/>(.03 to .06)</td>
              <td>&#60;.001</td>
              <td>.01 <break/>(.00 to .02)</td>
              <td>.21</td>
              <td>−.05 <break/>(−.06 to −.04)</td>
              <td>&#60;.001</td>
              <td>.01 <break/>(.01 to .02)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>% African American</td>
              <td>−.11 <break/>(−.14 to −.07)</td>
              <td>&#60;.001</td>
              <td>−.03 <break/>(−.04 to −.01)</td>
              <td>&#60;.001</td>
              <td>−.03 <break/>(−.04 to −.02)</td>
              <td>&#60;.001</td>
              <td>−.01 <break/>(−.02 to −.01)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>% Hispanic</td>
              <td>−.04 <break/>(−.08 to .00)</td>
              <td>.05</td>
              <td>.02 <break/>(.01 to .03)</td>
              <td>.00</td>
              <td>.07 <break/>(.05 to .09)</td>
              <td>&#60;.001</td>
              <td>.00 <break/>(.00 to .00)</td>
              <td>.77</td>
            </tr>
            <tr valign="top">
              <td>Household size</td>
              <td>−.18 <break/>(−.20 to −.15)</td>
              <td>&#60;.001</td>
              <td>−.11 <break/>(−.12 to −.09)</td>
              <td>&#60;.001</td>
              <td>−.07 <break/>(−.09 to −.05)</td>
              <td>&#60;.001</td>
              <td>−.01 <break/>(−.01 to −.01)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Economic disadvantage<sup>b</sup></td>
              <td>−.19 <break/>(−.21 to −.16)</td>
              <td>&#60;.001</td>
              <td>−.09 <break/>(−.10 to −.08)</td>
              <td>&#60;.001</td>
              <td>−.09 <break/>(−.10 to −.07)</td>
              <td>&#60;.001</td>
              <td>−.03 <break/>(−.04 to −.03)</td>
              <td>&#60;.001</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>Adjusted linear regression included all tract demographic and economic predictors simultaneously. Standard errors accounted for clustering at the county level.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>Economic disadvantage factor score derived from the following census tract characteristics: percent female-headed households, percent families living in poverty, unemployment rate, percent college graduates (reverse coded), and median family income (reverse coded).</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>
      <p>Sensitivity analyses were performed to examine the relationship between population characteristics and happiness for a different unit of aggregation: zip code areas. Relationships seen at the census tract level were similar to those at the zip code level, although they were more muted at the zip code level (not shown). This may be the case because census tracts are designed to be relatively homogenous with regard to characteristics such as economic status and demographic characteristics [<xref ref-type="bibr" rid="ref73">73</xref>].</p>
      <p>Healthy foods (ie, vegetables, fruits, nuts, lean proteins) composed 15.9% of food tweets, while fast food restaurant mentions composed 9.2% of food tweets. The most popular foods include coffee, beer, pizza, wine, chicken, ice cream, and sushi (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Popular healthy food terms included chicken, eggs, salad, turkey, and banana (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Starbucks was the most popular fast food place mentioned (accounting for 46% of all fast food restaurant mentions), followed by Chipotle (9.2%), Taco Bell (5.4%), and Buffalo Wild Wings (5.2%). We additionally examined the relationship between food tweets and business characteristics. At the zip code level, greater numbers of fast food restaurants were associated with more fast food tweets (B=.15), and higher caloric density of food mentions (B=.08). Urban areas had tweets with higher caloric density (B=.08) and more fast food restaurant mentions (B=.16). Happy tweets were more prevalent in zip codes with higher numbers of businesses (B=.11) and full-service restaurants (B=.16). Higher numbers of fast food restaurant (B=−.16) and convenience stores (B=−.07) were related to fewer happy tweets (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
      <p>Additionally, relatively few physical activity terms (13 terms) accounted for 75% of physical activity tweets (<xref ref-type="fig" rid="figure4">Figure 4</xref>) although our data collection system was set up to collect tweets on 376 physical activity terms. The most popular terms included walking, dancing, and running. At the zip code level, greater numbers of fitness and recreational sports centers were related to higher exercise intensity (B=.05) and happier tweets (B=.07). Surprisingly, the presence of nature parks was not associated with physical activity mentions. Urbanicity was associated with lower frequency of physical activity tweets and happy tweets but higher exercise intensity (<xref ref-type="table" rid="table4">Table 4</xref>). In supplemental analyses, we examined information on number of miles covered during physical activity if that was mentioned in the tweet (n=36,291; median 3.1 miles). Even fewer tweets contained information on amount of time the person engaged in physical activity. Among 5823 tweets that mentioned hour(s) of physical activity, the median amount was 2 hours. Among 2402 tweets that only referred to minutes of physical activity, the median number of minutes was 20.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Zip code and business characteristics as predictors of food tweets and happiness (data sources: 2013 zip code business patterns and 2010 US Census data).</p>
        </caption>
        <table width="624" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">

            <col width="100"/>
            <col width="85"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>

          <thead>
            <tr valign="top">
              <td>Zip code characteristics</td>
              <td>Average caloric density of food tweets <break/>n=21,756 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% fast food tweets <break/>n=21,756 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% happy tweets <break/>n=26,584 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Urban (yes)</td>
              <td>.08 (.05 to .11)</td>
              <td>&#60;.001</td>
              <td>.16 (.12 to .20)</td>
              <td>&#60;.001</td>
              <td>−.02 (−.06 to .02)</td>
              <td>.29</td>
            </tr>
            <tr valign="top">
              <td>Population density</td>
              <td>.00 (.00 to .01)</td>
              <td>.24</td>
              <td>.00 (−.01 to .01)</td>
              <td>.86</td>
              <td>.01 (.00 to .03)</td>
              <td>.18</td>
            </tr>
            <tr valign="top">
              <td>Number of businesses</td>
              <td>−.01 (−.02 to .01)</td>
              <td>.34</td>
              <td>.02 (.00 to .04)</td>
              <td>.04</td>
              <td>.11 (.08 to .15)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Businesses that sell alcohol</td>
              <td>−.03 (−.04 to −.02)</td>
              <td>&#60;.001</td>
              <td>−.04 (−.05 to −.04)</td>
              <td>&#60;.001</td>
              <td>−.01 (−.02 to .00)</td>
              <td>.02</td>
            </tr>
            <tr valign="top">
              <td>Full service restaurants</td>
              <td>−.04 (−.06 to −.02)</td>
              <td>&#60;.001</td>
              <td>.01 (−.01 to .03)</td>
              <td>.43</td>
              <td>.16 (.13 to .20)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Fast food restaurants</td>
              <td>.08 (.06 to .10)</td>
              <td>&#60;.001</td>
              <td>.15 (.13 to .17)</td>
              <td>&#60;.001</td>
              <td>−.16 (−.20 to −.12)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Grocery stores</td>
              <td>.01 (.00 to .01)</td>
              <td>.28</td>
              <td>−.04 (−.05 to −.03)</td>
              <td>&#60;.001</td>
              <td>−.02 (−.04 to .00)</td>
              <td>.05</td>
            </tr>
            <tr valign="top">
              <td>Convenience stores</td>
              <td>.02 (.01 to .02)</td>
              <td>&#60;.001</td>
              <td>−.03 (−.04 to −.02)</td>
              <td>&#60;.001</td>
              <td>−.07 (−.08 to −.05)</td>
              <td>&#60;.001</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>Adjusted linear regression included all zip code and business characteristics simultaneously. Standard errors accounted for clustering at the county level.</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Zip code and business characteristics as predictors of physical activity tweets and happiness (data sources: 2013 zip code business patterns and 2010 US Census data).</p>
        </caption>
        <table width="624" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">

            <col width="100"/>
            <col width="85"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
          <thead>
            <tr valign="top">
              <td>Zip code characteristics</td>
              <td>% physical activity tweets <break/>n=26,839 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>Exercise intensity <break/>n=20,715 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% happy tweets <break/>n=26,839 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Urban (yes)</td>
              <td>−.09 (−.11 to −.07)</td>
              <td>&#60;.001</td>
              <td>.07 (.04 to .11)</td>
              <td>&#60;.001</td>
              <td>−.08 (−.12 to −.04)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Population density</td>
              <td>−.01 (−.02 to .00)</td>
              <td>.01</td>
              <td>−.01 (−.01 to .00)</td>
              <td>.03</td>
              <td>.01 (.00 to .02)</td>
              <td>.08</td>
            </tr>
            <tr valign="top">
              <td>Fitness/recreational centers</td>
              <td>.01 (.00 to .02)</td>
              <td>.003</td>
              <td>.05 (.04 to .06)</td>
              <td>&#60;.001</td>
              <td>.07 (.06 to .08)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Nature parks</td>
              <td>.01 (.00 to .02)</td>
              <td>.05</td>
              <td>−.01 (−.01 to .00)</td>
              <td>.21</td>
              <td>.03 (.02 to .04)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Zoos/botanical gardens</td>
              <td>.00 (.00 to .01)</td>
              <td>.19</td>
              <td>.00 (−.01 to .00)</td>
              <td>.35</td>
              <td>.02 (.01 to .03)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Golf/country clubs</td>
              <td>.03 (.02 to .03)</td>
              <td>&#60;.001</td>
              <td>−.05 (−.06 to −.04)</td>
              <td>&#60;.001</td>
              <td>.03 (.02 to .04)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Skiing facilities</td>
              <td>.04 (.04 to .05)</td>
              <td>&#60;.001</td>
              <td>.02 (.02 to .03)</td>
              <td>&#60;.001</td>
              <td>.03 (.02 to .03)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>Bowling centers</td>
              <td>−.01 (−.02 to −.01)</td>
              <td>&#60;.001</td>
              <td>−.01 (−.02 to .00)</td>
              <td>.01</td>
              <td>−.02 (−.03 to −.01)</td>
              <td>&#60;.001</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>Adjusted linear regression included all zip code and business characteristics simultaneously. Standard errors accounted for clustering at county level.</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>

      
 <table-wrap position="float" id="table5">
        <label>Table 5</label>
        <caption>
          <p>Twitter happiness as a predictor of health outcomes in 232 zip codes in Utah (data source: Utah Behavioral Risk Factor Surveillance System [BRFSS] survey 2009-2014. BRFSS underwent design feature changes. Life dissatisfaction values were only available for 2009 and 2010. All other variables were averages from available data from 2011-2014).</p>
        </caption>
        <table width="600" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">

            <col width="300"/>
            <col width="200"/>
            <col width="100"/>

          <thead>
            <tr valign="top">
              <td>Zip code health outcomes</td>
              <td>Beta (95% CI)<sup>a</sup><break/>n=232</td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Life dissatisfaction</td>
              <td>.01 (−.13 to .15)</td>
              <td>.91</td>
            </tr>
            <tr valign="top">
              <td>Self-rated health (higher score=worse health)</td>
              <td>−.08 (−.21 to .05)</td>
              <td>.21</td>
            </tr>
            <tr valign="top">
              <td>Any past month physical activity/exercise</td>
              <td>.13 (.00 to .26)</td>
              <td>.05</td>
            </tr>
            <tr valign="top">
              <td>Body mass index (kg/m<sup>2</sup>)</td>
              <td>−.13 (−.26 to −.01)</td>
              <td>.04</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table5fn1">
            <p><sup>a</sup>Separate linear regression models for each zip code health outcome.</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table6">
        <label>Table 6</label>
        <caption>
          <p>State level Twitter sentiment predictors of health outcomes (N=49 states in the contiguous United States plus District of Columbia. Data sources: 2013 National Vital Statistics Reports and 2013 Behavioral Risk Factor Surveillance System [BRFSS] survey on adults).</p>
        </caption>
        <table width="623" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">

            <col width="120"/>
            <col width="90"/>
            <col width="45"/>
            <col width="90"/>
            <col width="45"/>
            <col width="90"/>
            <col width="45"/>

          <thead>
            <tr valign="top">
<td><break/></td>
              <td colspan="6" align="center">Twitter predictor variables</td>

            </tr>
            <tr valign="top">
              <td>State-level adult health outcomes</td>
              <td>Happiness <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>Positive sentiment toward healthy foods <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>Positive sentiment toward physical activity <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>All-cause mortality per 100,000</td>
              <td>−32.34 <break/>(−61.59 to −3.09)</td>
              <td>.03</td>
              <td>−23.51 <break/>(−40.54 to −6.48)</td>
              <td>.01</td>
              <td>−25.37 <break/>(−42.00 to −8.74)</td>
              <td>.004</td>
            </tr>
            <tr valign="top">
              <td>Homicide per 100,000</td>
              <td>−1.02 (−1.98 to −.06)</td>
              <td>.03</td>
              <td>−.76 (−1.28 to −.25)</td>
              <td>.01</td>
              <td>−.75 (−1.28 to −.23)</td>
              <td>.01</td>
            </tr>
            <tr valign="top">
              <td>% With diabetes</td>
              <td>−.58 (−1.05 to −.12)</td>
              <td>.02</td>
              <td>−.52 (−.78 to −.27)</td>
              <td>&#60;.001</td>
              <td>−.41 (−.68 to −.14)</td>
              <td>.004</td>
            </tr>
            <tr valign="top">
              <td>% With obesity</td>
              <td>−2.27 (−3.35 to −1.18)</td>
              <td>&#60;.001</td>
              <td>−1.67 (−2.25 to −1.09)</td>
              <td>&#60;.001</td>
              <td>−1.43 (−2.05 to −.80)</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>% Poor/fair self-rated health</td>
              <td>−1.13 (−2.13 to −.13)</td>
              <td>.03</td>
              <td>−.77 (−1.36 to −.19)</td>
              <td>.01</td>
              <td>−.61 (−1.21 to −.02)</td>
              <td>.05</td>
            </tr>
            <tr valign="top">
              <td>% With high cholesterol</td>
              <td>−.78 (−1.66 to .11)</td>
              <td>.08</td>
              <td>−.51 (−1.04 to .01)</td>
              <td>.06</td>
              <td>−.75 (−1.25 to −.26)</td>
              <td>.003</td>
            </tr>
            <tr valign="top">
              <td>% Physical inactivity</td>
              <td>−2.46 (−4.80 to −.12)</td>
              <td>.04</td>
              <td>−2.32 (−3.61 to −1.03)</td>
              <td>.001</td>
              <td>−1.59 (−2.97 to −.22)</td>
              <td>.02</td>
            </tr>
            <tr valign="top">
              <td>% Current smoking</td>
              <td>−1.47 (−2.68 to −.27)</td>
              <td>.02</td>
              <td>−1.20 (−1.88 to −.52)</td>
              <td>.001</td>
              <td>−1.14 (−1.82 to −.45)</td>
              <td>.002</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table6fn1">
            <p><sup>a</sup>Each cell in the table represents the coefficient estimate of the predictor variable (given by the column) on the state-level health outcome (given by the row). Adjusted linear regression models controlled for state-level demographics: median age, % non-Hispanic white, median household income.</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>
      <p>Additionally, merging in health-related datasets, we examined associations between our Twitter-based variables and other measures of health and well-being. Utilizing data from the 2009-2014 BRFSS in Utah, we found that zip codes in Utah with higher Twitter happiness scores were associated with lower body mass index and higher physical activity (<xref ref-type="table" rid="table5">Table 5</xref>). However, Twitter happiness scores were not statistically significantly related to self-rated health or life satisfaction.</p>
      <p>Greater state-level happiness, as indicated by tweets, was related to lower prevalence of obesity; a one standard deviation increase in happiness was associated with two percentage points lower prevalence in obesity. Greater positive sentiment for healthy foods was related to lower prevalence of diabetes and obesity and lower percent of the population who are physically inactive or current smokers (<xref ref-type="table" rid="table6">Table 6</xref>). Positive sentiment toward physical activity was related to lower obesity.</p>
      <p><xref ref-type="table" rid="table7">Table 7</xref> presents adjusted regression results for additional Twitter-derived variables (percentage of food tweets about healthy foods, percentage of food tweets about fast food, and percentage of tweets about physical activity) and a select number of state health outcomes. Out of the three Twitter-derived variables, percentage of tweets about physical activity was the strongest and most consistent predictor; more online discussion about physical activity was related to lower all-cause mortality and lower prevalence of obesity and fair/poor self-rated health.</p>
      <table-wrap position="float" id="table7">
        <label>Table 7</label>
        <caption>
          <p>State level Twitter food and physical activity characteristics as predictors of health outcomes (N=49 states in the contiguous United States plus District of Columbia. Data sources: 2013 National Vital Statistics Reports and 2013 Behavioral Risk Factor Surveillance System [BRFSS] survey on adults).</p>
        </caption>
        <table width="623" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">

            <col width="110"/>
            <col width="100"/>
            <col width="45"/>
            <col width="90"/>
            <col width="45"/>
            <col width="90"/>
            <col width="45"/>
          <thead>
            <tr valign="top">
              <td/>
              <td colspan="5" align="center">State-level adult health outcomes</td>
              <td><break/></td>
            </tr>
            <tr valign="top">
              <td> Twitter predictors</td>
              <td>All-cause mortality per 100,000 <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% with obesity <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
              <td>% poor/fair self-rated health <break/>Beta (95% CI)<sup>a</sup></td>
              <td><italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>% Of food tweets about healthy food</td>
              <td>11.74 (−6.48 to 29.96)</td>
              <td>.20</td>
              <td>−.09 (−.64 to .45)</td>
              <td>.73</td>
              <td>.11 (−.48 to .70)</td>
              <td>.71</td>
            </tr>
            <tr valign="top">
              <td>% Of food tweets about fast food</td>
              <td>9.84 (−8.56 to 28.25)</td>
              <td>.29</td>
              <td>.68 (.13 to 1.23)</td>
              <td>.02</td>
              <td>.77 (.18 to 1.37)</td>
              <td>.01</td>
            </tr>
            <tr valign="top">
              <td>% Of tweets about physical activity</td>
              <td>−28.17 (−46.68 to −9.65)</td>
              <td>.004</td>
              <td>−1.86 (−2.41 to −1.31)</td>
              <td>&#60;.001</td>
              <td>−.89 (−1.49 to −.29)</td>
              <td>.01</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table7fn1">
            <p><sup>a</sup>Adjusted linear regression models were run separately for each state-level health outcome (column) and included all three predictors (row) simultaneously in addition to the following state-level control variables: median age, % non-Hispanic white, median household income. Beta coefficient represents a change in the outcome for every standard deviation change in the predictor (row variable).</p>
          </fn>

        </table-wrap-foot>
      </table-wrap>

   
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Items in the top 50% of food tweets.</p>
        </caption>
        <graphic xlink:href="publichealth_v2i2e158_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Items in the top 50% of healthy food tweets.</p>
        </caption>
        <graphic xlink:href="publichealth_v2i2e158_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Items in the top 75% of physical activity tweets.</p>
        </caption>
        <graphic xlink:href="publichealth_v2i2e158_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this paper, we detail the building of a new national neighborhood data repository constructed from Twitter data which addresses a pressing need for neighborhood data that are available across large geographies and can be updated efficiently and cost-effectively. We demonstrate that simple machine learning algorithms for the construction of indicators for happiness, food, and physical activity can agree extremely well with manually generated labels. About one-fifth of tweets were identified as happy. There was substantial spatial variation in happiness across the United States. For instance, the proportion of tweets that were happy in Montana (the most happy state) was 10% greater than in Louisiana (the least happy state). Only a few terms are needed to capture the majority of tweets on food and physical activity. Economic disadvantage, urbanicity, and presence of fast food restaurants predicted lower area level happiness and lower frequency of healthy behavior mentions on Twitter. Moreover, we find that Twitter area-level characteristics are correlated with area-level health outcomes relating to health behaviors, chronic diseases, mortality, and self-rated health.</p>
      </sec>
      <sec>
        <title>Study Findings in Context</title>
        <p>Social media represents an important new data resource that is increasingly being harnessed for public health efforts such as surveillance of smoking behavior and sentiment toward tobacco products [<xref ref-type="bibr" rid="ref74">74</xref>]. However, few studies are leveraging social media data for the investigation of local area characteristics. More commonly, studies utilizing social media data examine patterns at the city, county, or state level [<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref75">75</xref>] rather than at finer levels of aggregation, which is necessary for understanding the potential impacts of neighborhood conditions.</p>
        <p>Neighborhoods can impact health through a myriad of pathways. Disadvantaged neighborhoods may have fewer resources that support physical activity and healthy diets. Poor and minority neighborhoods have fewer large supermarkets (where healthy foods are more abundant and affordable) compared to wealthy and majority white neighborhoods. Studies have documented increased fruit and vegetable consumption with more supermarket availability [<xref ref-type="bibr" rid="ref17">17</xref>]. Poor neighborhoods, which have been labeled food deserts, also tend to have more fast food restaurants, which can contribute to weight gain [<xref ref-type="bibr" rid="ref6">6</xref>]. In this study, we found that higher numbers of fast food restaurants were associated with higher frequency of fast food mentions, lower frequency of healthy food mentions, and less positive sentiment about healthy foods on Twitter. Our results align with a recent study conducted analyzing Instagram posts, which found that posts originating from census tracts deemed as food deserts contained fewer mentions of fruits and vegetables compared to Instagram posts outside food deserts [<xref ref-type="bibr" rid="ref76">76</xref>]. Additionally, neighborhoods may promote poor health through psychosocial pathways. Living in neighborhoods that are unclean, noisy, and violent can be psychologically harmful through over-activation of the stress response [<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref78">78</xref>].</p>
        <p>We found that economic disadvantage was related to lower frequency of happy tweets. Previous research by Mitchell and colleagues found that higher socioeconomic status was associated with higher Twitter happiness scores at the city level. Moreover, they identified mild correlations (r=−0.34) between happiness and obesity rates for 190 metropolitan statistical areas [<xref ref-type="bibr" rid="ref67">67</xref>] and that Twitter happiness scores were moderately correlated with other state-level indicators of well-being including shootings, the Peace index, America’s Health Ranking, and the Gallup-Healthways Well-Being Index (correlations ranged between 0.51 and 0.64) [<xref ref-type="bibr" rid="ref67">67</xref>].</p>
      </sec>
      <sec>
        <title>Study Strengths and Limitations</title>
        <p>In this paper, we describe the creation of a new neighborhood data repository constructed from Twitter data and merged with publicly available administrative datasets. However, this study is subject to several limitations. For instance, users of social media tend to be younger; in 2014, 37% of individuals aged 18 to 29 years old used Twitter compared to 12% of individuals aged 50 to 64 years and 10% among those 65 years and older. Nonetheless, adoption rates of social media have been steadily increasing [<xref ref-type="bibr" rid="ref79">79</xref>]. Tweets also include information rarely found in other neighborhood sources. Twitter users are composed of individuals as well as groups of individuals, organizations, companies, and news outlets. Thus, compiling such information may allow for a more comprehensive examination of the social environment.</p>
        <p>Moreover, we are only collecting a subset of publicly available tweets, and thus conclusions from our analytic sample may not generalize to the full population of tweets [<xref ref-type="bibr" rid="ref80">80</xref>]. Our construction of neighborhood indicators from Twitter data necessitated that we restricted our data collection to geolocated tweets. We utilized Twitter’s API which allows the retrieval of a maximum resulting volume of 1% of the total tweets at any given time point. Previous studies suggest that about 1% to 2% of tweets may contain global positioning system location information [<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref82">82</xref>] and that use of Twitter’s streaming API may obtain 40% to 90% of all geotagged tweets [<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref82">82</xref>]. Tweets with location information may be different from those without. For example, tweets in which users share their locations may be more likely to contain public and social activities such as friends tweeting from a restaurant or an event. However, in sensitivity analyses with a subset of control tweets (n=138,152 tweets) collected from July 9 to July 14, 2015, we did not detect any statistically significant differences in happiness scores between tweets with and without geographic coordinates (not shown).</p>
        <p>In creating our neighborhood indicators from Twitter data, we prioritized transparency and ease of implementation so that other researchers can replicate our algorithms. Our sentiment algorithm was trained to differentiate between happy and not happy sentiments (which encompasses neutral and sad sentiments). Thus, we were not able to specifically examine the prevalence of sad tweets, which may provide additionally useful information about the well-being of communities. In future work, we plan to target the identification of sadness. Our algorithms for food and physical activity implemented corpus-based classification with steps that are easily understandable. However, this technique does not take into account the entire context of sarcasm or humor in a tweet, challenges which still evade most natural language processing algorithms though some studies show promising results [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref84">84</xref>]. Our analysis of caloric density of food assumed calories per 100 grams. Most tweets do not specify the exact amount of food consumed, and thus our estimate is just an approximation.</p>
        <p>Additionally, the content of tweets reflects the type of information that people feel comfortable reporting and may not represent the true spectrum of their feelings or their experiences. For instance, people may feel most comfortable presenting a neutral stance rather than voicing polarizing viewpoints. Certain foods (cupcakes) may get tweeted more often than others (celery). Additionally, we cannot be certain that the food that was tweeted was indeed consumed. Similarly, physical activity tweets may reflect a mixture of intentions, plans, and actual engagement in those physical activities. Also, exercise intensity for physical activities was assessed for 30 minutes of physical activity for an individual weighing 155 pounds, which can be an under- or overestimation depending on the type of activity and persons engaged in that activity.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The epidemic rise in obesity and related chronic diseases in recent decades signal the importance of structural forces and social processes, but the dearth of data on contextual factors limits the investigation of multilevel effects on health. Social media data can be uniquely harnessed to capture social and cultural processes with potential impacts on health [<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref85">85</xref>-<xref ref-type="bibr" rid="ref89">89</xref>]. For instance, public posts can be utilized to measure prevalent happiness which can impact health through emotional contagion and the interconnectedness between mental health and physical health. Additionally, public posts about health behaviors may help us understand the prevalence of those behaviors as well as local area social norms. We demonstrate that tweets can provide a means to assess prevalent sentiment and food behaviors and physical activity, which can inform health interventions and policies to meet the needs of different neighborhoods. In particular, as this study suggests, neighborhoods with social and economic disadvantage, high urbanicity, and those with more fast food restaurants may exhibit lower happiness and fewer healthy behaviors.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Varying MALLET cut points for happy tweets and comparisons with manually generated labels.</p>
        <media xlink:href="publichealth_v2i2e158_app1.pdf" xlink:title="PDF File (Adobe PDF File), 14KB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>National distribution of happy tweets, by zip code. Geotagged tweets were spatially joined to their 2010 zip code locations and sentiment scores were computed. This color coded map presents the proportion of happy tweets in each zip code area, with darker colors signifying higher proportions of happy tweets.</p>
        <media xlink:href="publichealth_v2i2e158_app2.jpg" xlink:title="JPG File, 6MB"/>
      </app>
      <app id="app3">
        <title>Multimedia Appendix 3</title>
        <p>Proportion of happy tweets, by state.</p>
        <media xlink:href="publichealth_v2i2e158_app3.pdf" xlink:title="PDF File (Adobe PDF File), 18KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BRFSS</term>
          <def>
            <p>Behavioral Risk Factor Surveillance System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MALLET</term>
          <def>
            <p>Machine Learning for Language Toolkit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">Mturk</term>
          <def>
            <p>Mechanical Turk</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NAICS</term>
          <def>
            <p>North American Industry Classification System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by a National Institutes of Health grant (5K01ES025433) to Dr Nguyen. The research uses data from the Utah BRFSS survey, which is implemented by the Utah Department of Health in conjunction with the US Centers for Disease Control and Prevention. We thank Patsaporn Kanokvimankul for her assistance with locating some of the external health outcomes data for this paper. We thank Drs Jared B Hawkins and John S Brownstein for their assistance with quality control activities associated with the Twitter data.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
        <source>Social determinants of health</source>  
        <year>2016</year>  
        <month>01</month>  
        <day>30</day>  
        <access-date>2016-09-28</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>US Department of Health and Human Services</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.healthypeople.gov/2020/topics-objectives/topic/social-determinants-of-health">https://www.healthypeople.gov/2020/topics-objectives/topic/social-determinants-of-health</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks1l76Df"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Marmot</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Closing the gap in a generation: health equity through action on the social determinants of health</article-title>
        <source>Lancet</source>  
        <year>2008</year>  
        <month>11</month>  
        <day>08</day>  
        <volume>372</volume>  
        <issue>9650</issue>  
        <fpage>1661</fpage>  
        <lpage>1669</lpage> </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Villanueva</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>People living in hilly residential areas in metropolitan Perth have less diabetes: spurious association or important environmental determinant?</article-title>
        <source>Int J Health Geogr</source>  
        <year>2013</year>  
        <volume>12</volume>  
        <issue>1</issue>  
        <fpage>1</fpage>  
        <lpage>11</lpage> </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Schmidt</surname>
            <given-names>NM</given-names>
          </name>
          <name name-style="western">
            <surname>Lincoln</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>QC</given-names>
          </name>
          <name name-style="western">
            <surname>Acevedo-Garcia</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Osypuk</surname>
            <given-names>TL</given-names>
          </name>
        </person-group>
        <article-title>Examining mediators of housing mobility on adolescent asthma: results from a housing voucher experiment</article-title>
        <source>Soc Sci Med</source>  
        <year>2014</year>  
        <month>04</month>  
        <volume>107</volume>  
        <fpage>136</fpage>  
        <lpage>144</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24607675"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.socscimed.2014.02.020</pub-id>
        <pub-id pub-id-type="medline">24607675</pub-id>
        <pub-id pub-id-type="pii">S0277-9536(14)00124-5</pub-id>
        <pub-id pub-id-type="pmcid">PMC4070421</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>QC</given-names>
          </name>
          <name name-style="western">
            <surname>Rehkopf</surname>
            <given-names>DH</given-names>
          </name>
          <name name-style="western">
            <surname>Schmidt</surname>
            <given-names>NM</given-names>
          </name>
          <name name-style="western">
            <surname>Osypuk</surname>
            <given-names>TL</given-names>
          </name>
        </person-group>
        <article-title>Heterogeneous effects of housing vouchers on the mental health of US adolescents</article-title>
        <source>Am J Public Health</source>  
        <year>2016</year>  
        <month>04</month>  
        <volume>106</volume>  
        <issue>4</issue>  
        <fpage>755</fpage>  
        <lpage>762</lpage>  
        <pub-id pub-id-type="doi">10.2105/AJPH.2015.303006</pub-id>
        <pub-id pub-id-type="medline">26794179</pub-id>
        <pub-id pub-id-type="pmcid">PMC4986050</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Morland</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Wing</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Diez</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Poole</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Neighborhood characteristics associated with the location of food stores and food service places</article-title>
        <source>Am J Prev Med</source>  
        <year>2002</year>  
        <month>01</month>  
        <volume>22</volume>  
        <issue>1</issue>  
        <fpage>23</fpage>  
        <lpage>29</lpage>  
        <pub-id pub-id-type="medline">11777675</pub-id>
        <pub-id pub-id-type="pii">S0749379701004032</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Stafford</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Cummins</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ellaway</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sacker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Wiggins</surname>
            <given-names>RD</given-names>
          </name>
          <name name-style="western">
            <surname>Macintyre</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Pathways to obesity: identifying local, modifiable determinants of physical activity and diet</article-title>
        <source>Soc Sci Med</source>  
        <year>2007</year>  
        <month>11</month>  
        <volume>65</volume>  
        <issue>9</issue>  
        <fpage>1882</fpage>  
        <lpage>1897</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.socscimed.2007.05.042</pub-id>
        <pub-id pub-id-type="medline">17640787</pub-id>
        <pub-id pub-id-type="pii">S0277-9536(07)00304-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>MC</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>MacLeod</surname>
            <given-names>KE</given-names>
          </name>
          <name name-style="western">
            <surname>Winkleby</surname>
            <given-names>MA</given-names>
          </name>
        </person-group>
        <article-title>Socioeconomic and food-related physical characteristics of the neighbourhood environment are associated with body mass index</article-title>
        <source>J Epidemiol Community Health</source>  
        <year>2007</year>  
        <month>06</month>  
        <volume>61</volume>  
        <issue>6</issue>  
        <fpage>491</fpage>  
        <lpage>498</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17496257"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jech.2006.051680</pub-id>
        <pub-id pub-id-type="medline">17496257</pub-id>
        <pub-id pub-id-type="pii">61/6/491</pub-id>
        <pub-id pub-id-type="pmcid">PMC2465719</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Inagami</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Finch</surname>
            <given-names>BK</given-names>
          </name>
          <name name-style="western">
            <surname>Asch</surname>
            <given-names>SM</given-names>
          </name>
        </person-group>
        <article-title>You are where you shop: grocery store locations, weight, and neighborhoods</article-title>
        <source>Am J Prev Med</source>  
        <year>2006</year>  
        <month>07</month>  
        <volume>31</volume>  
        <issue>1</issue>  
        <fpage>10</fpage>  
        <lpage>17</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.amepre.2006.03.019</pub-id>
        <pub-id pub-id-type="medline">16777537</pub-id>
        <pub-id pub-id-type="pii">S0749-3797(06)00146-2</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Christiansen</surname>
            <given-names>KMH</given-names>
          </name>
          <name name-style="western">
            <surname>Qureshi</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Schaible</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Park</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Gittelsohn</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Environmental factors that impact the eating behaviors of low-income African American adolescents in Baltimore City</article-title>
        <source>J Nutr Educ Behav</source>  
        <year>2013</year>  
        <volume>45</volume>  
        <issue>6</issue>  
        <fpage>652</fpage>  
        <lpage>660</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jneb.2013.05.009</pub-id>
        <pub-id pub-id-type="medline">23916684</pub-id>
        <pub-id pub-id-type="pii">S1499-4046(13)00484-3</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Block</surname>
            <given-names>JP</given-names>
          </name>
          <name name-style="western">
            <surname>Scribner</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>DeSalvo</surname>
            <given-names>KB</given-names>
          </name>
        </person-group>
        <article-title>Fast food, race/ethnicity, and income: A geographic analysis</article-title>
        <source>Am J Prev Med</source>  
        <year>2004</year>  
        <volume>27</volume>  
        <issue>3</issue>  
        <fpage>211</fpage>  
        <lpage>217</lpage> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Roemmich</surname>
            <given-names>JN</given-names>
          </name>
          <name name-style="western">
            <surname>Epstein</surname>
            <given-names>LH</given-names>
          </name>
          <name name-style="western">
            <surname>Raja</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Robinson</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Winiewicz</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Association of access to parks and recreational facilities with the physical activity of young children</article-title>
        <source>Prev Med</source>  
        <year>2006</year>  
        <month>12</month>  
        <volume>43</volume>  
        <issue>6</issue>  
        <fpage>437</fpage>  
        <lpage>441</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ypmed.2006.07.007</pub-id>
        <pub-id pub-id-type="medline">16928396</pub-id>
        <pub-id pub-id-type="pii">S0091-7435(06)00282-9</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Brownson</surname>
            <given-names>RC</given-names>
          </name>
          <name name-style="western">
            <surname>Hoehner</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Day</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Forsyth</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sallis</surname>
            <given-names>JF</given-names>
          </name>
        </person-group>
        <article-title>Measuring the built environment for physical activity: state of the science</article-title>
        <source>Am J Prev Med</source>  
        <year>2009</year>  
        <month>04</month>  
        <volume>36</volume>  
        <issue>4 Suppl</issue>  
        <fpage>S99</fpage>  
        <lpage>S123</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19285216"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.amepre.2009.01.005</pub-id>
        <pub-id pub-id-type="medline">19285216</pub-id>
        <pub-id pub-id-type="pii">S0749-3797(09)00013-0</pub-id>
        <pub-id pub-id-type="pmcid">PMC2844244</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mujahid</surname>
            <given-names>MS</given-names>
          </name>
          <name name-style="western">
            <surname>Diez Roux</surname>
            <given-names>AV</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Gowda</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Sánchez</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Shea</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Jacobs</surname>
            <given-names>DR</given-names>
          </name>
          <name name-style="western">
            <surname>Jackson</surname>
            <given-names>SA</given-names>
          </name>
        </person-group>
        <article-title>Relation between neighborhood environments and obesity in the multi-ethnic study of atherosclerosis</article-title>
        <source>Am J Epidemiol</source>  
        <year>2008</year>  
        <month>06</month>  
        <day>1</day>  
        <volume>167</volume>  
        <issue>11</issue>  
        <fpage>1349</fpage>  
        <lpage>1357</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://aje.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=18367469"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1093/aje/kwn047</pub-id>
        <pub-id pub-id-type="medline">18367469</pub-id>
        <pub-id pub-id-type="pii">kwn047</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yen</surname>
            <given-names>IH</given-names>
          </name>
          <name name-style="western">
            <surname>Kaplan</surname>
            <given-names>GA</given-names>
          </name>
        </person-group>
        <article-title>Poverty area residence and changes in physical activity level: evidence from the Alameda County Study</article-title>
        <source>Am J Public Health</source>  
        <year>1998</year>  
        <volume>88</volume>  
        <issue>11</issue>  
        <fpage>1709</fpage>  
        <lpage>1712</lpage> </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ross</surname>
            <given-names>CE</given-names>
          </name>
        </person-group>
        <article-title>Walking, exercising, and smoking: does neighborhood matter?</article-title>
        <source>Soc Sci Med</source>  
        <year>2000</year>  
        <month>07</month>  
        <volume>51</volume>  
        <issue>2</issue>  
        <fpage>265</fpage>  
        <lpage>274</lpage>  
        <pub-id pub-id-type="medline">10832573</pub-id>
        <pub-id pub-id-type="pii">S0277953699004517</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Morland</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Wing</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Diez</surname>
            <given-names>RA</given-names>
          </name>
        </person-group>
        <article-title>The contextual effect of the local food environment on residents' diets: the atherosclerosis risk in communities study</article-title>
        <source>Am J Public Health</source>  
        <year>2002</year>  
        <month>11</month>  
        <volume>92</volume>  
        <issue>11</issue>  
        <fpage>1761</fpage>  
        <lpage>1767</lpage>  
        <pub-id pub-id-type="medline">12406805</pub-id>
        <pub-id pub-id-type="pmcid">PMC1447325</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Black</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Macinko</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Dixon</surname>
            <given-names>LB</given-names>
          </name>
          <name name-style="western">
            <surname>Fryer</surname>
            <given-names>GE</given-names>
          </name>
        </person-group>
        <article-title>Neighborhoods and obesity in New York City</article-title>
        <source>Health Place</source>  
        <year>2010</year>  
        <month>05</month>  
        <volume>16</volume>  
        <issue>3</issue>  
        <fpage>489</fpage>  
        <lpage>499</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.healthplace.2009.12.007</pub-id>
        <pub-id pub-id-type="medline">20106710</pub-id>
        <pub-id pub-id-type="pii">S1353-8292(09)00151-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Diez-Roux</surname>
            <given-names>AV</given-names>
          </name>
        </person-group>
        <article-title>Bringing context back into epidemiology: variables and fallacies in multilevel analysis</article-title>
        <source>Am J Public Health</source>  
        <year>1998</year>  
        <month>02</month>  
        <volume>88</volume>  
        <issue>2</issue>  
        <fpage>216</fpage>  
        <lpage>222</lpage>  
        <pub-id pub-id-type="medline">9491010</pub-id>
        <pub-id pub-id-type="pmcid">PMC1508189</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Macintyre</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>MacIver</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sooman</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Area, class and health: Should we be focusing on places or people</article-title>
        <source>J Soc Policy</source>  
        <year>1993</year>  
        <volume>22</volume>  
        <fpage>213</fpage>  
        <lpage>233</lpage> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Duncan</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Jones</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Moon</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Context, composition and heterogeneity: using multilevel models in health research</article-title>
        <source>Soc Sci Med</source>  
        <year>1998</year>  
        <volume>46</volume>  
        <fpage>97</fpage>  
        <lpage>117</lpage> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pearlin</surname>
            <given-names>LI</given-names>
          </name>
        </person-group>
        <article-title>The sociological study of stress</article-title>
        <source>J Health Soc Behav</source>  
        <year>1989</year>  
        <volume>30</volume>  
        <issue>3</issue>  
        <fpage>241</fpage>  
        <lpage>256</lpage> </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Johns</surname>
            <given-names>LE</given-names>
          </name>
        </person-group>
        <article-title>Neighborhood social cohesion and posttraumatic stress disorder in a community-based sample: findings from the Detroit Neighborhood Health Study</article-title>
        <source>Soc Psych Psych Epid</source>  
        <year>2012</year>  
        <volume>47</volume>  
        <issue>12</issue>  
        <fpage>1899</fpage>  
        <lpage>1906</lpage> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Oswald</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Powdthavee</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Obesity, unhappiness, and the challenge of affluence: theory and evidence</article-title>
        <source>Econ J</source>  
        <year>2007</year>  
        <volume>117</volume>  
        <fpage>117</fpage> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bray</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Gunnell</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Suicide rates, life satisfaction and happiness as markers for population mental health</article-title>
        <source>Soc Psych Psych Epid</source>  
        <year>2006</year>  
        <volume>41</volume>  
        <issue>5</issue>  
        <fpage>333</fpage>  
        <lpage>337</lpage> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tella</surname>
            <given-names>RD</given-names>
          </name>
          <name name-style="western">
            <surname>MacCulloch</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Oswald</surname>
            <given-names>AJ</given-names>
          </name>
        </person-group>
        <article-title>The macroeconomics of happiness</article-title>
        <source>Rev Econ Stat</source>  
        <year>2003</year>  
        <volume>85</volume>  
        <issue>4</issue>  
        <fpage>809</fpage>  
        <lpage>827</lpage> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Blanchflower</surname>
            <given-names>DG</given-names>
          </name>
          <name name-style="western">
            <surname>Oswald</surname>
            <given-names>AJ</given-names>
          </name>
        </person-group>
        <article-title>Hypertension and happiness across nations</article-title>
        <source>J Health Econ</source>  
        <year>2008</year>  
        <volume>27</volume>  
        <issue>2</issue>  
        <fpage>218</fpage>  
        <lpage>233</lpage> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dodds</surname>
            <given-names>PS</given-names>
          </name>
        </person-group>
        <article-title>Temporal patterns of happiness: information in a global social network: hedonometrics and Twitter</article-title>
        <source>PLoS ONE</source>  
        <year>2011</year>  
        <volume>6</volume>  
        <issue>12</issue>  
        <fpage>e26752</fpage> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Di Tella</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>MacCulloch</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Gross national happiness as an answer to the Easterlin Paradox?</article-title>
        <source>J Devel Econ</source>  
        <year>2008</year>  
        <volume>86</volume>  
        <issue>1</issue>  
        <fpage>22</fpage>  
        <lpage>42</lpage> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bearman</surname>
            <given-names>PS</given-names>
          </name>
          <name name-style="western">
            <surname>Moody</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Suicide and friendships among American adolescents</article-title>
        <source>Am J Public Health</source>  
        <year>2004</year>  
        <volume>94</volume>  
        <issue>1</issue>  
        <fpage>89</fpage>  
        <lpage>95</lpage> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Richards</surname>
            <given-names>MH</given-names>
          </name>
        </person-group>
        <source>Divergent Realities: The Emotional Lives of Mothers, Fathers, and Adolescents</source>  
        <year>1994</year>  
        <publisher-loc>New York, NY</publisher-loc>
        <publisher-name>Basic Books</publisher-name></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fowler</surname>
            <given-names>JH</given-names>
          </name>
          <name name-style="western">
            <surname>Christiakis</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Dynamic spread of happiness in a large social network: Longitudinal analysis over 20 years in the Framingham heart study</article-title>
        <source>Brit Med J</source>  
        <year>2008</year>  
        <volume>337</volume>  
        <fpage>a2338</fpage> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Guan</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Kamo</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Contextualizing depressive contagion: A multilevel network approach</article-title>
        <source>Soc Ment Health</source>  
        <year>2015</year>  
        <month>12</month>  
        <day>09</day>  
        <pub-id pub-id-type="doi">10.1177/2156869315619657</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pachucki</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Jacques</surname>
            <given-names>PF</given-names>
          </name>
          <name name-style="western">
            <surname>Christakis</surname>
            <given-names>NA</given-names>
          </name>
        </person-group>
        <article-title>Social network concordance in food choice among spouses, friends, and siblings</article-title>
        <source>Am J Public Health</source>  
        <year>2011</year>  
        <month>11</month>  
        <volume>101</volume>  
        <issue>11</issue>  
        <fpage>2170</fpage>  
        <lpage>2177</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21940920"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2105/AJPH.2011.300282</pub-id>
        <pub-id pub-id-type="medline">21940920</pub-id>
        <pub-id pub-id-type="pii">AJPH.2011.300282</pub-id>
        <pub-id pub-id-type="pmcid">PMC3222397</pub-id></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keating</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>O'Malley</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Murabito</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>KP</given-names>
          </name>
          <name name-style="western">
            <surname>Christakis</surname>
            <given-names>NA</given-names>
          </name>
        </person-group>
        <article-title>Minimal social network effects evident in cancer screening behavior</article-title>
        <source>Cancer</source>  
        <year>2011</year>  
        <month>07</month>  
        <day>1</day>  
        <volume>117</volume>  
        <issue>13</issue>  
        <fpage>3045</fpage>  
        <lpage>3052</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.1002/cncr.25849"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/cncr.25849</pub-id>
        <pub-id pub-id-type="medline">21264828</pub-id>
        <pub-id pub-id-type="pmcid">PMC3119780</pub-id></nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rosenquist</surname>
            <given-names>JN</given-names>
          </name>
          <name name-style="western">
            <surname>Murabito</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Fowler</surname>
            <given-names>JH</given-names>
          </name>
          <name name-style="western">
            <surname>Christakis</surname>
            <given-names>NA</given-names>
          </name>
        </person-group>
        <article-title>The spread of alcohol consumption behavior in a large social network</article-title>
        <source>Ann Intern Med</source>  
        <year>2010</year>  
        <month>04</month>  
        <day>6</day>  
        <volume>152</volume>  
        <issue>7</issue>  
        <fpage>426</fpage>  
        <lpage>433</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20368648"/>
        </comment>  
        <pub-id pub-id-type="doi">10.7326/0003-4819-152-7-201004060-00007</pub-id>
        <pub-id pub-id-type="medline">20368648</pub-id>
        <pub-id pub-id-type="pii">152/7/426</pub-id>
        <pub-id pub-id-type="pmcid">PMC3343772</pub-id></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mednick</surname>
            <given-names>SC</given-names>
          </name>
          <name name-style="western">
            <surname>Christakis</surname>
            <given-names>NA</given-names>
          </name>
          <name name-style="western">
            <surname>Fowler</surname>
            <given-names>JH</given-names>
          </name>
        </person-group>
        <article-title>The spread of sleep loss influences drug use in adolescent social networks</article-title>
        <source>PLoS One</source>  
        <year>2010</year>  
        <volume>5</volume>  
        <issue>3</issue>  
        <fpage>e9775</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0009775"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0009775</pub-id>
        <pub-id pub-id-type="medline">20333306</pub-id>
        <pub-id pub-id-type="pmcid">PMC2841645</pub-id></nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
        <source>National Archive of Criminal Justice</source>  
        <year>2012</year>  
        <access-date>2016-09-28</access-date>
        <comment>Project on Human Development in Chicago Neighborhoods 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.icpsr.umich.edu/icpsrweb/PHDCN/">http://www.icpsr.umich.edu/icpsrweb/PHDCN/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6ks3vmrzJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
        <source>Baltimore Neighborhood Indicators Alliance: Vital Signs 11</source>  
        <year>2013</year>  
        <month>09</month>  
        <day>24</day>  
        <access-date>2016-09-28</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bniajfi.org/wp-content/uploads/2014/04/VS-11-Intro.pdf">http://bniajfi.org/wp-content/uploads/2014/04/VS-11-Intro.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks49HpP9"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Peterson</surname>
            <given-names>RD</given-names>
          </name>
          <name name-style="western">
            <surname>Krivo</surname>
            <given-names>LJ</given-names>
          </name>
        </person-group>
        <source>National Neighborhood Crime Study</source>  
        <year>2000</year>  
        <access-date>2016-09-28</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.icpsr.umich.edu/icpsrweb/RCMD/studies/27501">http://www.icpsr.umich.edu/icpsrweb/RCMD/studies/27501</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks4ICpcz"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Eysenbach</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Infodemiology: tracking flu-related searches on the web for syndromic surveillance</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2006</year>  
        <fpage>244</fpage>  
        <lpage>248</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17238340"/>
        </comment>  
        <pub-id pub-id-type="medline">17238340</pub-id>
        <pub-id pub-id-type="pii">86095</pub-id>
        <pub-id pub-id-type="pmcid">PMC1839505</pub-id></nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Eysenbach</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Infodemiology and infoveillance</article-title>
        <source>Am J Prev Med</source>  
        <year>2011</year>  
        <volume>40</volume>  
        <issue>5</issue>  
        <fpage>S154</fpage>  
        <lpage>S158</lpage> </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yepes</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Han</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Investigating public health surveillance using Twitter</article-title>
        <source>ACL-IJCNLP</source>  
        <year>2015</year>  
        <volume>2015</volume>  
        <fpage>164</fpage> </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nsoesie</surname>
            <given-names>EO</given-names>
          </name>
          <name name-style="western">
            <surname>Kluberg</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Brownstein</surname>
            <given-names>JS</given-names>
          </name>
        </person-group>
        <article-title>Online reports of foodborne illness capture foods implicated in official foodborne outbreak reports</article-title>
        <source>Prev Med</source>  
        <year>2014</year>  
        <month>10</month>  
        <volume>67</volume>  
        <fpage>264</fpage>  
        <lpage>269</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S0091-7435(14)00293-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.ypmed.2014.08.003</pub-id>
        <pub-id pub-id-type="medline">25124281</pub-id>
        <pub-id pub-id-type="pii">S0091-7435(14)00293-X</pub-id>
        <pub-id pub-id-type="pmcid">PMC4167574</pub-id></nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Woo</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Cho</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Shim</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>SH</given-names>
          </name>
        </person-group>
        <article-title>Estimating influenza outbreaks using both search engine query data and social media data in South Korea</article-title>
        <source>J Med Internet Res</source>  
        <year>2016</year>  
        <volume>18</volume>  
        <issue>7</issue>  
        <fpage>e177</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2016/7/e177/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4955</pub-id>
        <pub-id pub-id-type="medline">27377323</pub-id>
        <pub-id pub-id-type="pii">v18i7e177</pub-id>
        <pub-id pub-id-type="pmcid">PMC4949385</pub-id></nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McIver</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Hawkins</surname>
            <given-names>JB</given-names>
          </name>
          <name name-style="western">
            <surname>Chunara</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Chatterjee</surname>
            <given-names>AK</given-names>
          </name>
          <name name-style="western">
            <surname>Bhandari</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fitzgerald</surname>
            <given-names>TP</given-names>
          </name>
          <name name-style="western">
            <surname>Jain</surname>
            <given-names>SH</given-names>
          </name>
          <name name-style="western">
            <surname>Brownstein</surname>
            <given-names>JS</given-names>
          </name>
        </person-group>
        <article-title>Characterizing sleep issues using Twitter</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <volume>17</volume>  
        <issue>6</issue>  
        <fpage>e140</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/6/e140/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4476</pub-id>
        <pub-id pub-id-type="medline">26054530</pub-id>
        <pub-id pub-id-type="pii">v17i6e140</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526927</pub-id></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>QC</given-names>
          </name>
        </person-group>
        <article-title>Leveraging geotagged Twitter data to examine neighborhood happiness, diet, and physical activity</article-title>
        <source>Appl Geogr</source>  
        <year>2016</year>  
        <volume>73</volume>  
        <fpage>77</fpage>  
        <lpage>88</lpage> </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Fabbri</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rosenbloom</surname>
            <given-names>ST</given-names>
          </name>
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>A scalable framework to detect personal health mentions on Twitter</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <volume>17</volume>  
        <issue>6</issue>  
        <fpage>e138</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2015/6/e138/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4305</pub-id>
        <pub-id pub-id-type="medline">26048075</pub-id>
        <pub-id pub-id-type="pii">v17i6e138</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526910</pub-id></nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hawkins</surname>
            <given-names>JB</given-names>
          </name>
        </person-group>
        <article-title>Measuring patient-perceived quality of care in US hospitals using Twitter</article-title>
        <source>BMJ Qual Safety</source>  
        <year>2015</year>  
        <fpage>4309</fpage> </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Etherington</surname>
            <given-names>TR</given-names>
          </name>
        </person-group>
        <article-title>Teaching introductory GIS programming to geographers using an open source Python approach</article-title>
        <source>J Geogr Higher Educ</source>  
        <year>2016</year>  
        <volume>40</volume>  
        <issue>1</issue>  
        <fpage>117</fpage>  
        <lpage>130</lpage> </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Guttman</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>R-trees: a dynamic index structure for spatial searching</article-title>
        <year>1984</year>  
        <conf-name>1984 ACM SIGMOD international conference on Management of Data</conf-name>
        <conf-date>1984</conf-date>
        <conf-loc>New York, NY</conf-loc>
        <fpage>47</fpage>  
        <lpage>57</lpage> </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>QC</given-names>
          </name>
          <name name-style="western">
            <surname>Schmidt</surname>
            <given-names>NM</given-names>
          </name>
          <name name-style="western">
            <surname>Glymour</surname>
            <given-names>MM</given-names>
          </name>
          <name name-style="western">
            <surname>Rehkopf</surname>
            <given-names>DH</given-names>
          </name>
          <name name-style="western">
            <surname>Osypuk</surname>
            <given-names>TL</given-names>
          </name>
        </person-group>
        <article-title>Were the mental health benefits of a housing mobility intervention larger for adolescents in higher socioeconomic status families?</article-title>
        <source>Health Place</source>  
        <year>2013</year>  
        <month>09</month>  
        <volume>23</volume>  
        <fpage>79</fpage>  
        <lpage>88</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23792412"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.healthplace.2013.05.002</pub-id>
        <pub-id pub-id-type="medline">23792412</pub-id>
        <pub-id pub-id-type="pii">S1353-8292(13)00071-3</pub-id>
        <pub-id pub-id-type="pmcid">PMC3757111</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>NI</given-names>
          </name>
          <name name-style="western">
            <surname>Story</surname>
            <given-names>MT</given-names>
          </name>
          <name name-style="western">
            <surname>Nelson</surname>
            <given-names>MC</given-names>
          </name>
        </person-group>
        <article-title>Neighborhood environments: disparities in access to healthy foods in the US</article-title>
        <source>Am J Prev Med</source>  
        <year>2009</year>  
        <month>01</month>  
        <volume>36</volume>  
        <issue>1</issue>  
        <fpage>74</fpage>  
        <lpage>81</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.amepre.2008.09.025</pub-id>
        <pub-id pub-id-type="medline">18977112</pub-id>
        <pub-id pub-id-type="pii">S0749-3797(08)00838-6</pub-id></nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Roth</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Integrating population- and patient-level data for secondary use of electronic health records to study overweight and obesity</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2013</year>  
        <fpage>192</fpage> </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>Stanford Natural Language Processing Group</collab>
        </person-group>
        <source>Stanford tokenizer</source>  
        <year>2015</year>  
        <access-date>2016-09-28</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://nlp.stanford.edu/software/tokenizer.shtml">http://nlp.stanford.edu/software/tokenizer.shtml</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks509JgN"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nigam</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Lafferty</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>McCallum</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Using maximum entropy for text classification</article-title>
        <year>1999</year>  
        <conf-name>IJCAI-99 workshop on machine learning for information filtering</conf-name>
        <conf-date>1999</conf-date>
        <conf-loc>Stockholm, Sweden</conf-loc></nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
        <source>Sentiment140 for Academics</source>  
        <access-date>2016-08-16</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://sites.google.com/site/twittersentimenthelp/for-researchers">https://sites.google.com/site/twittersentimenthelp/for-researchers</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6joQzyTSS"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
        <source>Twitter sentiment corpus</source>  
        <year>2011</year>  
        <access-date>2016-08-16</access-date>
        <publisher-name>Sanders Analytics</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.sananalytics.com/lab/twitter-sentiment/">http://www.sananalytics.com/lab/twitter-sentiment/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6joVh9V4R"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
        <source>Kaggle in Class</source>  
        <year>2011</year>  
        <comment>Sentiment Classification 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://inclass.kaggle.com/c/si650winter11">https://inclass.kaggle.com/c/si650winter11</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6joWjx5fX"/></comment> </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
        <source>National Nutrient Database</source>  
        <year>2014</year>  
        <month>02</month>  
        <day>5</day>  
        <access-date>2016-09-28</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>United States Department of Agriculture</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://ndb.nal.usda.gov/ndb/search/list?format=&#38;count=&#38;max=25&#38;sort=&#38;fg=&#38;man=&#38;lfacet=&#38;qlookup=&#38;offset=50">http://ndb.nal.usda.gov/ndb/search/list?format=&#38;count=&#38;max=25&#38;sort=&#38;fg=&#38;man=&#38;lfacet=&#38;qlookup=&#38;offset=50</ext-link>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ainsworth</surname>
            <given-names>BE</given-names>
          </name>
        </person-group>
        <article-title>2011 compendium of physical activities: a second update of codes and MET values</article-title>
        <source>Med Sci Sport Exer</source>  
        <year>2011</year>  
        <volume>43</volume>  
        <issue>8</issue>  
        <fpage>1575</fpage>  
        <lpage>1581</lpage> </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Electronic word of mouth on Twitter about physical activity in the United States: exploratory infodemiology study</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>11</issue> </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kendall</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Descriptive analysis of physical activity conversations on Twitter</article-title>
        <year>2011</year>  
        <conf-name>CHI '11 Extended Abstracts on Human Factors in Computing Systems</conf-name>
        <conf-date>2011</conf-date>
        <conf-loc>Vancouver, Canada</conf-loc>
        <fpage>1555</fpage>  
        <lpage>1560</lpage> </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="web">
        <source>Body Measurements</source>  
        <year>2012</year>  
        <month>09</month>  
        <day>2</day>  
        <access-date>2016-09-28</access-date>
        <publisher-name>National Center for Health Statistics, Centers for Disease Control and Prevention</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cdc.gov/nchs/fastats/body-measurements.htm">http://www.cdc.gov/nchs/fastats/body-measurements.htm</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks62JHO7"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>Harvard Health Publications</collab>
        </person-group>
        <source>Calories burned in 30 minutes for people of three different weights</source>  
        <year>2015</year>  
        <access-date>2016-09-28</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.health.harvard.edu/newsweek/Calories-burned-in-30-minutes-of-leisure-and-routine-activities.htm">http://www.health.harvard.edu/newsweek/Calories-burned-in-30-minutes-of-leisure-and-routine-activities.htm</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ks6AbOhg"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Snow</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Cheap and fast--but is it good? Evaluating non-expert annotations for natural language tasks</article-title>
        <year>2008</year>  
        <conf-name>Proceedings of the Conference on Empirical Methods in Natural Language Processing</conf-name>
        <conf-date>2008</conf-date>
        <conf-loc>Stroudsburg, PA</conf-loc>
        <fpage>254</fpage>  
        <lpage>263</lpage> </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mitchell</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Frank</surname>
            <given-names>MR</given-names>
          </name>
          <name name-style="western">
            <surname>Harris</surname>
            <given-names>KD</given-names>
          </name>
          <name name-style="western">
            <surname>Dodds</surname>
            <given-names>PS</given-names>
          </name>
          <name name-style="western">
            <surname>Danforth</surname>
            <given-names>CM</given-names>
          </name>
        </person-group>
        <article-title>The geography of happiness: connecting twitter sentiment and expression, demographics, and objective characteristics of place</article-title>
        <source>PLoS One</source>  
        <year>2013</year>  
        <volume>8</volume>  
        <issue>5</issue>  
        <fpage>e64417</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0064417"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0064417</pub-id>
        <pub-id pub-id-type="medline">23734200</pub-id>
        <pub-id pub-id-type="pii">PONE-D-13-07723</pub-id>
        <pub-id pub-id-type="pmcid">PMC3667195</pub-id></nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="web">
        <source>Sentiment140 general information</source>  
        <access-date>2016-09-29</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://help.sentiment140.com/">http://help.sentiment140.com/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kt10i1Ki"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="web">
        <source>ZIP Code Business Patterns</source>  
        <year>2015</year>  
        <access-date>2016-09-29</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>US Census Bureau</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.census.gov/newsroom/press-releases/2015/cb15-tps39.html">http://www.census.gov/newsroom/press-releases/2015/cb15-tps39.html</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kt1J9t9p"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kuczmarski</surname>
            <given-names>MF</given-names>
          </name>
          <name name-style="western">
            <surname>Kuczmarski</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Najjar</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Effects of age on validity of self-reported height, weight, and body mass index: findings from the Third National Health and Nutrition Examination Survey, 1988-1994</article-title>
        <source>J Am Diet Assoc</source>  
        <year>2001</year>  
        <month>01</month>  
        <volume>101</volume>  
        <issue>1</issue>  
        <fpage>28</fpage>  
        <lpage>34</lpage>  
        <pub-id pub-id-type="doi">10.1016/S0002-8223(01)00008-6</pub-id>
        <pub-id pub-id-type="medline">11209581</pub-id>
        <pub-id pub-id-type="pii">S0002-8223(01)00008-6</pub-id></nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="web">
        <source>Behavioral Risk Factor Surveillance System Survey Data</source>  
        <year>2013</year>  
        <access-date>2016-09-29</access-date>
        <publisher-loc>Atlanta, GA</publisher-loc>
        <publisher-name>Centers for Disease Control and Prevention</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cdc.gov/brfss/">http://www.cdc.gov/brfss/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kt2xfrVk"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="web">
        <source>Utah Behavioral Risk Factor Surveillance System Survey Data</source>  
        <year>2014</year>  
        <access-date>2016-10-06</access-date>
        <publisher-name>Office of Public Health Assessment, Utah Department of Health</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://health.utah.gov/opha/OPHA_BRFSS.htm">http://health.utah.gov/opha/OPHA_BRFSS.htm</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6l4AUrogD"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="web">
        <source>Geographic terms and concepts: census tract</source>  
        <year>2012</year>  
        <month>01</month>  
        <day>06</day>  
        <access-date>2016-10-02</access-date>
        <publisher-name>US Census Bureau</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.census.gov/geo/reference/gtc/gtc_ct.html">https://www.census.gov/geo/reference/gtc/gtc_ct.html</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kxgOkozH"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Myslín</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Conway</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Using twitter to examine smoking behavior and perceptions of emerging tobacco products</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>8</issue>  
        <fpage>e174</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2013/8/e174/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2534</pub-id>
        <pub-id pub-id-type="medline">23989137</pub-id>
        <pub-id pub-id-type="pii">v15i8e174</pub-id>
        <pub-id pub-id-type="pmcid">PMC3758063</pub-id></nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Paul</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Derdze</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>You are what you tweet: analyzing Twitter for public health</article-title>
        <year>2011</year>  
        <month>07</month>  
        <day>05</day>  
        <conf-name>Proceedings of the Fifth International AAAI Conference on Weblogs and Social Media</conf-name>
        <conf-date>2011</conf-date>
        <conf-loc>Barcelona, Spain</conf-loc></nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>De Choudry</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Sharma</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Kiciman</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Characterizing dietary choices, nutrition, and language in food deserts via social media</article-title>
        <year>2016</year>  
        <conf-name>Proceedings of the 19th ACM Conference on Computer-Supported Cooperative Work &#38; Social Computing</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>San Francisco, CA</conf-loc>
        <fpage>1157</fpage>  
        <lpage>1170</lpage> </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McEwen</surname>
            <given-names>BS</given-names>
          </name>
        </person-group>
        <article-title>Stress, adaptation, and disease: allostasis and allostatic load</article-title>
        <source>Ann NY Academy Sci</source>  
        <year>1998</year>  
        <volume>840</volume>  
        <issue>1</issue>  
        <fpage>33</fpage>  
        <lpage>44</lpage> </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Seeman</surname>
            <given-names>TE</given-names>
          </name>
        </person-group>
        <article-title>Price of adaptation: allostatic load and its health consequences</article-title>
        <source>Arch Intern Med</source>  
        <year>1997</year>  
        <volume>157</volume>  
        <issue>19</issue>  
        <fpage>2259</fpage>  
        <lpage>2268</lpage> </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Duggan</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>Social Media Update 2014</source>  
        <year>2015</year>  
        <access-date>2016-09-29</access-date>
        <publisher-loc>Washington, DC</publisher-loc>
        <publisher-name>Pew Internet and American Life Project</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.pewinternet.org/files/2015/01/PI_SocialMediaUpdate20144.pdf">http://www.pewinternet.org/files/2015/01/PI_SocialMediaUpdate20144.pdf</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kt2HVzYR"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="web">
        <source>Difference between sample and filter streaming API</source>  
        <year>2016</year>  
        <month>08</month>  
        <day>06</day>  
        <access-date>2016-09-29</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://twittercommunity.com/t/diffence-between-sample-and-filter-streaming-api/15094">https://twittercommunity.com/t/diffence-between-sample-and-filter-streaming-api/15094</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6kt2LR5ve"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Burton</surname>
            <given-names>SH</given-names>
          </name>
        </person-group>
        <article-title>Right time, right place? Health communication on Twitter: value and accuracy of location information</article-title>
        <source>J Internet Med Res</source>  
        <year>2012</year>  
        <volume>14</volume>  
        <issue>6</issue> </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Morstatter</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Is the Sample Good Enough? Comparing Data from Twitter's Streaming API with Twitter's Firehose</article-title>
        <source>arXiv.5204v1 cs.SI</source>  
        <year>2013</year>  
        <fpage>1306</fpage> </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Burfoot</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Baldwin</surname>
            <given-names>TA</given-names>
          </name>
        </person-group>
        <article-title>Automatic satire detection: Are you having a laugh?</article-title>
        <year>2009</year>  
        <conf-name>Proceedings of the Association for Computational Linguistics-IJCNLP</conf-name>
        <conf-date>2009</conf-date>
        <conf-loc>Singapore</conf-loc></nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ptácek</surname>
            <given-names>Tomás</given-names>
          </name>
          <name name-style="western">
            <surname>Habernal</surname>
            <given-names>Ivan</given-names>
          </name>
          <name name-style="western">
            <surname>Hong</surname>
            <given-names>Jun</given-names>
          </name>
        </person-group>
        <article-title>Sarcasm Detection on Czech and English Twitter</article-title>
        <source>COLING</source>  
        <year>2014</year>  
        <conf-name>25th International Conference on Computational Linguistics</conf-name>
        <conf-date>August 23-29 2014</conf-date>
        <conf-loc>Dublin, Ireland</conf-loc>
        <fpage>213</fpage>  
        <lpage>223</lpage> </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ali</surname>
            <given-names>MM</given-names>
          </name>
          <name name-style="western">
            <surname>Amialchuk</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Heiland</surname>
            <given-names>FW</given-names>
          </name>
        </person-group>
        <article-title>Weight-related behavior among adolescents: the role of peer effects</article-title>
        <source>PLoS One</source>  
        <year>2011</year>  
        <volume>6</volume>  
        <issue>6</issue>  
        <fpage>e21179</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0021179"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0021179</pub-id>
        <pub-id pub-id-type="medline">21731665</pub-id>
        <pub-id pub-id-type="pii">PONE-D-11-04128</pub-id>
        <pub-id pub-id-type="pmcid">PMC3121719</pub-id></nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Vartanian</surname>
            <given-names>LR</given-names>
          </name>
          <name name-style="western">
            <surname>Sokol</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Herman</surname>
            <given-names>CP</given-names>
          </name>
          <name name-style="western">
            <surname>Polivy</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Social models provide a norm of appropriate food intake for young women</article-title>
        <source>PLoS One</source>  
        <year>2013</year>  
        <volume>8</volume>  
        <issue>11</issue>  
        <fpage>e79268</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0079268"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0079268</pub-id>
        <pub-id pub-id-type="medline">24236117</pub-id>
        <pub-id pub-id-type="pii">PONE-D-13-24309</pub-id>
        <pub-id pub-id-type="pmcid">PMC3827378</pub-id></nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Finch</surname>
            <given-names>BK</given-names>
          </name>
          <name name-style="western">
            <surname>Bower</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sastry</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Collective efficacy and obesity: the potential influence of social factors on health</article-title>
        <source>Soc Sci Med</source>  
        <year>2006</year>  
        <month>02</month>  
        <volume>62</volume>  
        <issue>3</issue>  
        <fpage>769</fpage>  
        <lpage>778</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.socscimed.2005.06.033</pub-id>
        <pub-id pub-id-type="medline">16039767</pub-id>
        <pub-id pub-id-type="pii">S0277-9536(05)00319-9</pub-id></nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>US state- and county-level social capital in relation to obesity and physical inactivity: A multilevel, multivariable analysis</article-title>
        <source>Soc Sci Med</source>  
        <year>2006</year>  
        <volume>63</volume>  
        <issue>4</issue>  
        <fpage>1045</fpage>  
        <lpage>1059</lpage> </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Berkman</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Syme</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Social networks, host resistance, and mortality: A nine-year follow-up study of Alameda County residents</article-title>
        <source>Am J Educ</source>  
        <year>1979</year>  
        <volume>190</volume>  
        <issue>2</issue>  
        <fpage>186</fpage>  
        <lpage>204</lpage> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
