@article{discovery10171178,
            year = {2023},
           month = {September},
         journal = {Annual Review of Economics},
           title = {Text Algorithms in Economics},
           pages = {659--688},
          volume = {15},
            note = {{\copyright} 2023 by the Author(s). This work is licensed under a Creative Commons Attribution 4.0 International License (http://creativecommons.org/licenses/by/4.0/).},
       publisher = {Annual Reviews},
        keywords = {text as data, topic models, word embeddings, large language models, transformer models, JEL C18, JEL C45, JEL C55},
             url = {https://www.annualreviews.org/journal/economics},
          author = {Hansen, Stephen},
        abstract = {This article provides an overview of the methods used for algorithmic text analysis in economics, with a focus on three key contributions. First, we introduce methods for representing documents as high-dimensional count vectors over vocabulary terms, for representing words as vectors, and for representing word sequences as embedding vectors. Second, we define four core empirical tasks that encompass most text-as-data research in economics and enumerate the various approaches that have been taken so far to accomplish these tasks. Finally, we flag limitations in the current literature, with a focus on the challenge of validating algorithmic output.}
}