@article{discovery1469803,
          number = {5},
         journal = {Data Mining and Knowledge Discovery},
       publisher = {Kluwer Academic Publishers},
           pages = {1434--1457},
           title = {Assessing the impact of a health intervention via user-generated Internet content},
            year = {2015},
            note = {{\copyright} The Author(s) 2015. This article is distributed under the terms of the Creative Commons Attribution 4.0 International License(http://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made.},
           month = {July},
          volume = {29},
             url = {http://dx.doi.org/10.1007/s10618-015-0427-9},
        abstract = {Assessing the effect of a health-oriented intervention by traditional epidemiological methods is commonly based only on population segments that use healthcare services. Here we introduce a complementary framework for evaluating the impact of a targeted intervention, such as a vaccination campaign against an infectious disease, through a statistical analysis of user-generated content submitted on web platforms. Using supervised learning, we derive a nonlinear regression model for estimating the prevalence of a health event in a population from Internet data. This model is applied to identify control location groups that correlate historically with the areas, where a specific intervention campaign has taken place. We then determine the impact of the intervention by inferring a projection of the disease rates that could have emerged in the absence of a campaign. Our case study focuses on the influenza vaccination program that was launched in England during the 2013/14 season, and our observations consist of millions of geo-located search queries to the Bing search engine and posts on Twitter. The impact estimates derived from the application of the proposed statistical framework support conventional assessments of the campaign.},
            issn = {1384-5810},
          author = {Lampos, V and Yom-Tov, E and Pebody, R and Cox, IJ},
        keywords = {Gaussian Process, Infectious diseases, Intervention, Search query logs, Social media, Supervised learning, User-generated content}
}