Frank Keller,
Mirella Lapata and
Olga Ourioupina. Using the Web to Overcome Data Sparseness. In
Jan Hajic and
Yuji Matsumoto editors, Proceedings of the Conference on Empirical Methods in Natural Language Processing, Pages 230-237, Philadelphia, USA, 2002. [Abstract] [Annote]
@InProceedings{Keller_et_al:2002,
AUTHOR = {Keller, Frank and Lapata, Mirella and Ourioupina, Olga},
TITLE = {Using the Web to Overcome Data Sparseness},
YEAR = {2002},
BOOKTITLE = {Proceedings of the Conference on Empirical Methods in Natural Language Processing},
PAGES = {230-237},
EDITOR = {Hajic, Jan and Matsumoto, Yuji},
ADDRESS = {Philadelphia, USA},
URL = {http://www.iccs.informatics.ed.ac.uk/~keller/papers/emnlp02.pdf},
ABSTRACT = {This paper shows that the web can be employed to obtain frequencies for bigrams that are unseen in a given corpus. We describe a method for retrieving counts for adjective-noun, noun-noun, and verb-object bigrams from the web by querying a search engine. We evaluate this method by demonstrating that web frequencies and correlate with frequencies obtained from a carefully edited, balanced corpus. We also perform a task-based evaluation, showing that web frequencies can reliably predict human plausibility judgments.},
ANNOTE = {COLIURL : Keller:2002:UWO.pdf} }
|