@inproceedings{discovery10065716, title = {Fast phonetic similarity search over large repositories}, number = {PART 2}, volume = {8645}, year = {2014}, series = {Lecture Notes in Computer Science}, pages = {74--81}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, month = {January}, address = {Cham, Switzerland}, booktitle = {Database and Expert Systems Applications}, publisher = {Springer}, note = {This version is the author accepted manuscript. For information on re-use, please refer to the publisher's terms and conditions.}, abstract = {Analysis of unstructured data may be inefficient in the presence of spelling errors. Existing approaches use string similarity methods to search for valid words within a text, with a supporting dictionary. However, they are not rich enough to encode phonetic information to assist the search. In this paper, we present a novel approach for efficiently perform phonetic similarity search over large data sources, that uses a data structure called PhoneticMap to encode language-specific phonetic information. We validate our approach through an experiment over a data set using a Portuguese variant of a well-known repository, to automatically correct words with spelling errors.}, url = {https://doi.org/10.1007/978-3-319-10085-2\%5f6}, author = {Tissot, H and Peschl, G and Del Fabro, MD}, keywords = {Phonetic Similarity, String Similarity, Fast Search}, issn = {1611-3349} }