@InProceedings{Crocker_Brants:1999,
AUTHOR = {Crocker, Matthew W. and Brants, Thorsten},
TITLE = {Incremental Probabilistic Models of Human Linguistic Performance},
YEAR = {1999},
BOOKTITLE = {5th Conference on Architectures and Mechanisms for Language Processing (AMLaP '99), September 23-26},
ADDRESS = {Edinburgh, Scotland},
ABSTRACT = {Models of human language processing increasingly advocate probabilistic mechanisms for parsing and disambiguation (e.g. Jurafsky, 1996; MacDonald et al 1994; Crocker and Corley; to appear). These models resolve local syntactic and lexical ambiguity by promoting the analysis which has the greatest probability of being correct. In this talk we will outline a new probabilistic parsing model which is a generalisation of the Hidden Markov Models which have previously been defended as pschological models of lexical category disambiguation (Corley and Crocker, in press). The model uses layered, or cascaded, markov models (CMMs) to build up a syntactic analysis (Brants, 1999). In contrast with many probabilisic parsing models, CMMs can easily be implemented to parse incrementally. Incremental CMMs have the property of generating partial structures including hypothetical continuations after receiving each new word in the input. New material is incorporated into the existing structure and ambiguities are resolved based on local context. Alternative hypotheses are assigned probabilities which are used for ranking, and only a bounded number of parallel alternatives are pursued. Simple bounds on the model straightforwardly predict the recency effects often attributed only to connectionist-based models (Stevenson, 1994; Macdonald et al, 1884; Kempen and Vosse, 1987). In contrast with several current models, the combination of weights in CMMs is motivated directly by probability theory. The parameters of the model are acquired automatically from a corpus, and there are relatively few stipulations about how probabilities are combined (contra Jurafsky, 1996; Tanenhaus et al, in press). An important cognitive parameter concerns the number of analyses which are maintained in parallel. We will present results of experiments which evaluate the performance of the model for both general language processing, and on several critical ambiguities where human performance is well understood. The model is a first step in exploring the role of optimal models of human linguistic performance, as motivated by Chater, Crocker, and Pickering (1998). Recently, Pickering, Traxler and Crocker (to appear) have provided experimental evidence which challenges a pure maximum likelihood model of syntactic ambiguity resolution. As an alternative, they propose a measure, termed Informativity, which they derive from a rational analysis of the parsing and interpretation problem. In the final part of the talk we will outline how the presented model can be adapted to implement Informativity, which combines probability with a newly proposed measure termed Specificity.} }
|