% % GENERATED FROM https://www.coli.uni-saarland.de % by : anonymous % IP : coli2006.lst.uni-saarland.de % at : Mon, 05 Feb 2024 15:43:08 +0100 GMT % % Selection : Author: Thorsten_Brants % @Proceedings{Anne_et_al:2000, TITLE = {Proceedings of the Workshop on Linguistically Interpreted Corpora LINC-2000, August 6}, YEAR = {2000}, EDITOR = {Abeillé, Anne and Brants, Thorsten and Uszkoreit, Hans}, ADDRESS = {Luxembourg} } @InProceedings{Brants:1994, AUTHOR = {Brants, Thorsten}, TITLE = {Parameteroptimierung für ein Statistisches Sprachmodell}, YEAR = {1994}, BOOKTITLE = {1. Fachtagung der Gesellschaft für Kognitionswissenschaft, 13.-15. Oktober}, PAGES = {13-17}, ADDRESS = {Freiburg, Germany} } @InProceedings{Brants:1995, AUTHOR = {Brants, Thorsten}, TITLE = {Tagset Reduction Without Information Loss}, YEAR = {1995}, BOOKTITLE = {33rd Annual Meeting of the Association for Computational Linguistics (ACLANNUAL'95)}, ADDRESS = {Cambridge, Massachussetts, USA}, ANNOTE = {COLIURL : Brants:1995:TRI.pdf} } @InProceedings{Brants:1995_1, AUTHOR = {Brants, Thorsten}, TITLE = {Estimating HMM Topologies}, YEAR = {1995}, BOOKTITLE = {Tbilisi Symposium on Language, Logic, and Computation, October 19-22}, ADDRESS = {Tbilisi, Georgia} } @TechReport{Brants:1995_2, AUTHOR = {Brants, Thorsten}, TITLE = {Some Experiments with the CRATER Corpus}, YEAR = {1995}, ADDRESS = {Saarbrücken}, TYPE = {Technical Report}, INSTITUTION = {Universität des Saarlandes} } @InProceedings{Brants:1996, AUTHOR = {Brants, Thorsten}, TITLE = {Better Language Models with Model Merging}, YEAR = {1996}, BOOKTITLE = {Conference on Empirical Methods in Natural Language Processing (EMNLP '96), May 17-18}, ADDRESS = {Philadelphia, USA}, ANNOTE = {COLIURL : Brants:1996:BLM.pdf} } @InProceedings{Brants:1996_1, AUTHOR = {Brants, Thorsten}, TITLE = {Estimating Markov Model Structures}, YEAR = {1996}, BOOKTITLE = {4th International Conference on Spoken Language Processing (ICSLP'96), October 3-6}, VOLUME = {2}, PAGES = {893-896}, EDITOR = {Bunnell, H. T. and Idsardi, W.}, ADDRESS = {Philadelphia, USA} } @TechReport{Brants:1996_2, AUTHOR = {Brants, Thorsten}, TITLE = {TnT - A Statistical Part-of-Speech Tagger}, YEAR = {1996}, ADDRESS = {Saarbrücken}, TYPE = {Technical Report}, INSTITUTION = {Universität des Saarlandes}, ANNOTE = {COLIURL : Brants:1996:TSP.pdf} } @InProceedings{Brants:1997, AUTHOR = {Brants, Thorsten}, TITLE = {Internal and External Tagsets in Part-of-Speech Tagging}, YEAR = {1997}, BOOKTITLE = {5th European Conference on Speech Communication and Technology (EUROSPEECH'97), September 22-25}, VOLUME = {5}, PAGES = {2787-2790}, EDITOR = {Kokkinakis, G. and Fakotakis, N. and Dermatas, E.}, ADDRESS = {Rhodes, Greece}, URL = {https://www.coli.uni-saarland.de/~thorsten/eurospeech97/}, ABSTRACT = {We present an approach to statistical part-of-speech tagging that uses two different tagsets, one for its internal and one for its external representation. The internal tagset is used in the underlying Markov model, while the external tagset constitutes the output of the tagger. The internal tagset can be modified and optimized to increase tagging accuracy (with respect to the external tagset). We evaluate this approach in an experiment and show that it performs significantly better than approaches using only one tagset.}, ANNOTE = {COLIURL : Brants:1997:IET.pdf} } @TechReport{Brants:1998, AUTHOR = {Brants, Thorsten}, TITLE = {The NeGra Export Format for Annotated Corpora}, YEAR = {1998}, MONTH = {April}, NUMBER = {98}, ADDRESS = {Saarbrücken}, TYPE = {CLAUS-Report}, INSTITUTION = {Universität des Saarlandes}, URL = {ftp://ftp.coli.uni-sb.de/pub/coli/claus/claus98.ps ftp://ftp.coli.uni-sb.de/pub/coli/claus/claus98.dvi}, ABSTRACT = {This paper describes the export format version 3 of corpora used in the NeGra project. We use a line-oriented and ASCII-based format that is both easy to read by humans and easy to parse by machines. It is intended for data exchange and for efficient processing with standard Unix tools and C programs.}, ANNOTE = {COLIURL : Brants:1998:NEF.pdf Brants:1998:NEF.ps Brants:1998:NEF.dvi} } @InCollection{Brants:1998_1, AUTHOR = {Brants, Thorsten}, TITLE = {Estimating Hidden Markov Model Topologies}, YEAR = {1998}, BOOKTITLE = {The Tbilisi Symposium on Logic, Language and Computation: Selected Papers. Studies in Logic, Language and Information}, PAGES = {163-176}, EDITOR = {Ginzburg, J. and Khasidashvili, Z. and Vogel, C. and Lévy, J.-J. and Vallduví, E.}, ADDRESS = {Stanford}, PUBLISHER = {CSLI Publications}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-Tbilisi98.ps.gz}, ANNOTE = {COLIURL : Brants:1998:EHM.pdf Brants:1998:EHM.ps} } @PhdThesis{Brants:1999, AUTHOR = {Brants, Thorsten}, TITLE = {Tagging and Parsing with Cascaded Markov Models - Automation of Corpus Annotation. Saarbrücken Dissertations in Computational Linguistics and Language Technology, Volume 6.}, YEAR = {1999}, ADDRESS = {Saarbrücken}, SCHOOL = {Universität des Saarlandes}, URL = {http://www.dfki.de/lt/diss/diss_en.htm}, ABSTRACT = {This thesis presents new techniques for parsing natural language. They are based on Markov Models, which are commonly used in part-of-speech tagging for sequential processing on the word level. We show that Markov Models can be successfully applied to other levels of syntactic processing. First, two classification tasks are handled: the assignment of grammatical functions and the labeling of non-terminal nodes. Then, Markov Models are used to recognize hierarchical syntactic structures. Each layer of a structure is represented by a separate Markov Model. The output of a lower layer is passed as input to a higher layer, hence the name: Cascaded Markov Models. Instead of simple symbols, the states emit partial context-free structures. The new techniques are applied to corpus annotation and partial parsing and are evaluated using corpora of different languages and domains.} } @InProceedings{Brants:1999_1, AUTHOR = {Brants, Thorsten}, TITLE = {Cascaded Markov Models}, YEAR = {1999}, BOOKTITLE = {9th Conference of the European Chapter of the Association for Computational Linguistics (EACL '99), June 8-12}, ADDRESS = {Bergen}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-EACL99.ps.gz}, ABSTRACT = {This paper presents a new approach to partial parsing of context-free structures. The approach is based on Markov Models. Each layer of the resulting structure is represented by its own Markov Model, and output of a lower layer is passed as input to the next higher layer. An empirical evaluation of the method yields very good results for NP/PP chunking of German newspaper texts.}, ANNOTE = {COLIURL : Brants:1999:CMM.pdf Brants:1999:CMM.ps} } @InProceedings{Brants:2000, AUTHOR = {Brants, Thorsten}, TITLE = {Inter-Annotator Agreement for a German Newspaper Corpus}, YEAR = {2000}, BOOKTITLE = {2nd International Conference on Language Resources and Evaluation (LREC-2000), May 31 - June 2}, ADDRESS = {Athens, Greece}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-LREC00.ps.gz}, ABSTRACT = {This paper presents the results of an investigation on inter-annotator agreement for the NEGRA corpus, consisting of German newspaper texts. The corpus is syntactically annotated with part-of-speech and structural information. Agreement for part-of-speech is 98.6%, the labeled F-score for structures is 92.4%. The two annotations are used to create a common final version by discussing differences and by several iterations of cleaning. Initial and final versions are compared. We identify categories causing large numbers of differences and categories that are handled inconsistently.}, ANNOTE = {COLIURL : Brants:2000:IAA.pdf Brants:2000:IAA.ps} } @InProceedings{Brants:2000_1, AUTHOR = {Brants, Thorsten}, TITLE = {TnT - A Statistical Part-of-Speech Tagger}, YEAR = {2000}, BOOKTITLE = {6th Applied Natural Language Processing (ANLP '00), April 29 - May 4}, PAGES = {224-231}, ADDRESS = {Seattle, USA}, PUBLISHER = {Association for Computational Lingusitics}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-ANLP00.ps.gz}, ABSTRACT = {Trigrams'n'Tags (TnT) is an efficient statistical part-of-speech tagger. Contrary to claims found elsewhere in the literature, we argue that a tagger based on Markov models performs at least as well as other current approaches, including the Maximum Entropy framework. A recent comparison has even shown that TnT performs significantly better for the tested corpora. We describe the basic model of TnT, the techniques used for smoothing and for handling unknown words. Furthermore, we present evaluations on two corpora.}, ANNOTE = {COLIURL : Brants:2000:TSP.pdf Brants:2000:TSP.ps} } @InProceedings{Brants_Crocker:2000, AUTHOR = {Brants, Thorsten and Crocker, Matthew W.}, TITLE = {Probabilistic Parsing and Psychological Plausibility}, YEAR = {2000}, BOOKTITLE = {18th International Conference on Computational Linguistics (COLING '00), July 31 - August 4}, VOLUME = {1}, PAGES = {111-117}, ADDRESS = {Saarbrücken, Luxembourg, Nancy}, PUBLISHER = {Morgan Kaufmannn Publishers}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-Crocker-COLING00.ps.gz}, ABSTRACT = {Given the recent evidence for probabilistic mechanisms in models of human ambiguity resolution, this paper investigates the plausibility of exploiting current wide-coverage, probabilistic parsing techniques to model human linguistic performance. In particular, we investigate the performance of standard stochastic parsers when they are revised to operate incrementally, and with reduced memory resources. We present techniques for ranking and filtering analyses, together with experimental results. Our results confirm that stochastic parsers which adhere to these psychologically motivated constraints achieve good performance. Memory can be reduced down to 1% (compared to exhausitve search) without reducing recall and precision. Additionally, these models exhibit substantially faster performance. Finally, we argue that this general result is likely to hold for more sophisticated, and psycholinguistically plausible, probabilistic parsing models.}, ANNOTE = {COLIURL : Brants:2000:PPP.pdf Brants:2000:PPP.ps} } @TechReport{Brants_et_al:1997, AUTHOR = {Brants, Thorsten and Hendriks, Roland and Kramp, Sabine and Krenn, Brigitte and Preis, Cordula and Skut, Wojciech and Uszkoreit, Hans}, TITLE = {Das NEGRA-Annotationsschema}, YEAR = {1997}, ADDRESS = {Saarbrücken}, TYPE = {Negra Project Report}, INSTITUTION = {Universität des Saarlandes}, URL = {https://www.coli.uni-saarland.de/sfb378/negra-corpus/negra-corpus.html}, ABSTRACT = {Das vorliegende Annotierschema entstand während des Aufbaus des NEGRA-Korpus. Nach drei Jahren Arbeit (wobei der Aufbau des Korpus nur ein Teilaspekt des Projektes war) liegen 20,000 annotierte Sätze (ca. 350,000 Tokens) sowie diese mehrfach überarbeitete Version des Schemas vor.} } @InProceedings{Brants_Plaehn:2000, AUTHOR = {Brants, Thorsten and Plaehn, Oliver}, TITLE = {Interactive Corpus Annotation}, YEAR = {2000}, BOOKTITLE = {2nd International Conference on Language Resources and Evaluation (LREC'00), May 31 - June 2}, EDITOR = {Gavrilidou, M. and Carayannis, G. and Markantonatou, S. and Piperidis, Stelios and Steinhaouer, G.}, ADDRESS = {Athens, Greece}, PUBLISHER = {European Language Resource Association (ELRA)}, URL = {https://www.coli.uni-saarland.de/~plaehn/papers/lrec2000.ps.gz https://www.coli.uni-saarland.de/~plaehn/papers/lrec2000.pdf}, ABSTRACT = {We present an easy-to-use graphical tool for syntactic corpus annotation. This tool, Annotate, interacts with a part-of-speech tagger and a parser running in the background. The parser incrementally suggests single phrases bottom-up based on cascaded Markov models. A human annotator confirms or rejects the parser's suggestions. This semi-automatic process facilitates a very rapid and efficient annotation.}, ANNOTE = {COLIURL : Brants:2000:ICA.pdf Brants:2000:ICA.ps} } @InProceedings{Brants_Samuelsson:1995, AUTHOR = {Brants, Thorsten and Samuelsson, Christer}, TITLE = {Tagging the Teleman Corpus}, YEAR = {1995}, BOOKTITLE = {10th Nordic Conference of Computational Linguistics, May 30-31}, ADDRESS = {Helsinki, Finland}, ANNOTE = {COLIURL : Brants:1995:TTCa.pdf} } @TechReport{Brants_Samuelsson:1995_1, AUTHOR = {Brants, Thorsten and Samuelsson, Christer}, TITLE = {Tagging the Teleman Corpus}, YEAR = {1995}, MONTH = {April}, NUMBER = {54}, ADDRESS = {Saarbrücken}, TYPE = {CLAUS-Report}, INSTITUTION = {Universität des Saarlandes}, URL = {ftp://ftp.coli.uni-sb.de/pub/coli/claus/claus54.ps}, ABSTRACT = {Experiments were carried out comparing the Swedish Teleman and the English Susanne corpora using an HMM-based and a novel reductionistic statistical part-of-speech tagger. They indicate that tagging the Teleman corpus is the more difficult task, and that the performance of the two different taggers is comparable.}, ANNOTE = {COLIURL : Brants:1995:TTCb.pdf Brants:1995:TTCb.ps} } @InProceedings{Brants_Skut:1998, AUTHOR = {Brants, Thorsten and Skut, Wojciech}, TITLE = {Automation of Treebank Annotation}, YEAR = {1998}, BOOKTITLE = {3rd International Conference on New Methods in Language Processing (NeMLaP'98), January 11-24}, ADDRESS = {Sydney, Australia}, URL = {https://www.coli.uni-saarland.de/~thorsten/nemlap98/}, ABSTRACT = {This paper describes applications of stochastic and symbolic NLP methods to treebank annotation. In particular we focus on (1) the automation of treebank annotation, (2) the comparison of conflicting annotations for the same sentence and (3) the automatic detection of inconsistencies. These techniques are currently employed for building a German treebank.}, ANNOTE = {COLIURL : Brants:1998:ATA.pdf} } @InProceedings{Brants_et_al:1997_1, AUTHOR = {Brants, Thorsten and Skut, Wojciech and Krenn, Brigitte}, TITLE = {Tagging Grammatical Functions}, YEAR = {1997}, BOOKTITLE = {Conference on Empirical Methods in Natural Language Processing (EMNLP '97)}, ADDRESS = {Providence, Rhode Island, USA}, URL = {https://www.coli.uni-saarland.de/~thorsten/emnlp97/}, ABSTRACT = {This paper addresses issues in automated treebank construction. We show how standard part-of-speech tagging techniques extend to the more general problem of structural annotation, especially for determining grammatical functions and syntactic categories. Annotation is viewed as an interactive process where manual and automatic processing alternate. Efficiency and accuracy results are presented. We also discuss further automation steps.}, ANNOTE = {COLIURL : Brants:1997:TGF.pdf} } @InProceedings{Brants_et_al:1999, AUTHOR = {Brants, Thorsten and Skut, Wojciech and Uszkoreit, Hans}, TITLE = {Syntactic Annotation of a German Newspaper Corpus}, YEAR = {1999}, BOOKTITLE = {ATALA sur le Corpus Annotés pour la Syntaxe Treebanks, June 18-19}, PAGES = {69-76}, EDITOR = {Abeillé, Anne}, ADDRESS = {Paris, France}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Brants-ea-ATALA99.pdf}, ABSTRACT = {We report on the syntactic annotation of a German newspaper corpus. The annotations consists of context-free structures, additionally allowing crossing branches, with labeled nodes (phrases) and edges (grammatical functions). Furthermore, we present a new, interactive semi-automatic annotation process that allows efficient and reliable annotations.}, ANNOTE = {COLIURL : Brants:1999:SAG.pdf} } @InProceedings{Crocker_Brants:1999, AUTHOR = {Crocker, Matthew W. and Brants, Thorsten}, TITLE = {Incremental Probabilistic Models of Human Linguistic Performance}, YEAR = {1999}, BOOKTITLE = {5th Conference on Architectures and Mechanisms for Language Processing (AMLaP '99), September 23-26}, ADDRESS = {Edinburgh, Scotland}, ABSTRACT = {Models of human language processing increasingly advocate probabilistic mechanisms for parsing and disambiguation (e.g. Jurafsky, 1996; MacDonald et al 1994; Crocker and Corley; to appear). These models resolve local syntactic and lexical ambiguity by promoting the analysis which has the greatest probability of being correct. In this talk we will outline a new probabilistic parsing model which is a generalisation of the Hidden Markov Models which have previously been defended as pschological models of lexical category disambiguation (Corley and Crocker, in press). The model uses layered, or cascaded, markov models (CMMs) to build up a syntactic analysis (Brants, 1999). In contrast with many probabilisic parsing models, CMMs can easily be implemented to parse incrementally. Incremental CMMs have the property of generating partial structures including hypothetical continuations after receiving each new word in the input. New material is incorporated into the existing structure and ambiguities are resolved based on local context. Alternative hypotheses are assigned probabilities which are used for ranking, and only a bounded number of parallel alternatives are pursued. Simple bounds on the model straightforwardly predict the recency effects often attributed only to connectionist-based models (Stevenson, 1994; Macdonald et al, 1884; Kempen and Vosse, 1987). In contrast with several current models, the combination of weights in CMMs is motivated directly by probability theory. The parameters of the model are acquired automatically from a corpus, and there are relatively few stipulations about how probabilities are combined (contra Jurafsky, 1996; Tanenhaus et al, in press). An important cognitive parameter concerns the number of analyses which are maintained in parallel. We will present results of experiments which evaluate the performance of the model for both general language processing, and on several critical ambiguities where human performance is well understood. The model is a first step in exploring the role of optimal models of human linguistic performance, as motivated by Chater, Crocker, and Pickering (1998). Recently, Pickering, Traxler and Crocker (to appear) have provided experimental evidence which challenges a pure maximum likelihood model of syntactic ambiguity resolution. As an alternative, they propose a measure, termed Informativity, which they derive from a rational analysis of the parsing and interpretation problem. In the final part of the talk we will outline how the presented model can be adapted to implement Informativity, which combines probability with a newly proposed measure termed Specificity.} } @Article{Crocker_Brants:2000, AUTHOR = {Crocker, Matthew W. and Brants, Thorsten}, TITLE = {Wide Coverage Probabilistic Sentence Processing}, YEAR = {2000}, JOURNAL = {Journal of Psycholinguistic Research}, VOLUME = {29}, NUMBER = {6}, PAGES = {647-669} } @InProceedings{Crocker_Brants:2000_1, AUTHOR = {Crocker, Matthew W. and Brants, Thorsten}, TITLE = {Incremental Probabilistic Models of Human Linguistic Performance}, YEAR = {2000}, BOOKTITLE = {13th Annual CUNY Conference on Sentence Processing, March 30 - April 1}, ADDRESS = {La Jolla, California, USA} } @InProceedings{Dipper_et_al:2001, AUTHOR = {Dipper, Stefanie and Brants, Thorsten and Lezius, Wolfgang and Plaehn, Oliver and Smith, George}, TITLE = {The TIGER Treebank}, YEAR = {2001}, BOOKTITLE = {3rd Workshop on Linguistically Interpreted Corpora (LINC '01), August 29}, ADDRESS = {Leuven, Belgium}, URL = {http://www.ims.uni-stuttgart.de/projekte/TIGER/paper/linc2001-abstract-tiger.ps.gz http://www.ims.uni-stuttgart.de/projekte/TIGER/paper/linc2001-abstract-tiger.pdf}, ANNOTE = {COLIURL : Dipper:2001:TT.pdf Dipper:2001:TT.ps} } @TechReport{Kipper_et_al:1995, AUTHOR = {Kipper, Bernhard and Brants, Thorsten and Plach, Marcus and Schäfer, Ralph}, TITLE = {Bayessche Netze: Ein einführendes Beispiel}, YEAR = {1995}, NUMBER = {Nr. 4}, ADDRESS = {Saarbrücken}, TYPE = {Bericht des Graduiertenkolleg Kognitionswissenschaft}, INSTITUTION = {Universität des Saarlandes}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Kipper-ea-GKK4.pdf}, ABSTRACT = {Bayessche Netze stellen einen vielbeachteten Formalismus zur Repräsentation und Verarbeitung von unsicherem Wissen dar. Zum Formalismus der Bayesschen Netze existieren zwar einige einführende Arbeiten; was diesen Einführungen jedoch fehlt, ist eine Illustration der innerhalb von Bayesschen Netzen verwendeten Mechanismen an Hand konkreter (Zahlen-)Beispiele. Mit der vorliegenden Arbeit soll genau diese Lücke geschlossen werden: Die grundlegende Struktur Bayesscher Netze wird durch die Modellierung eines Beispielszenarios erläutert. In dem daraus resultierenden Beispielnetz werden ferner die probabilistischen Methoden, die bei Bayesschen Netzen Anwendung finden, mit konkreten Zahlenwerten durchgerechnet.}, ANNOTE = {COLIURL : Kipper:1995:BNE.pdf} } @InProceedings{Krenn_et_al:1998, AUTHOR = {Krenn, Brigitte and Brants, Thorsten and Skut, Wojciech and Uszkoreit, Hans}, TITLE = {Recent Advances in Corpus Annotation}, YEAR = {1998}, BOOKTITLE = {Proceedings of the 10th European Summer School in Logic, Language and Information (ESSLLI'98). Workshop on Automated Acquisition of Syntax and Parsing, August 17-28}, ADDRESS = {Saarbrücken, Germany}, URL = {http://www.dfki.de/dfkibib/publications/docs/Krenn_1998_RACA.pdf}, ANNOTE = {COLIURL : Krenn:1998:RAC.pdf} } @InProceedings{Plaehn_Brants:2000, AUTHOR = {Plaehn, Oliver and Brants, Thorsten}, TITLE = {Annotate - An Efficient Interactive Annotation Tool}, YEAR = {2000}, BOOKTITLE = {6th Applied Natural Language Processing Conference (ANLP '00), April 29 - May 4}, ADDRESS = {Seattle, Washington, USA}, URL = {https://www.coli.uni-saarland.de/~plaehn/papers/anlp2000.ps.gz https://www.coli.uni-saarland.de/~plaehn/papers/anlp2000.pdf https://www.coli.uni-saarland.de/~plaehn/papers/anlp2000.html}, ABSTRACT = {During the creation of the NEGRA corpus, we developed very efficient interactive annotation tools. An easy-to-use graphical tool, Annotate, is used to manipulate syntactic structures. Annotate interacts with a part-of-speech tagger and a parser running in the background, thus facilitating rapid semi-automatic corpus annotation.}, ANNOTE = {COLIURL : Plaehn:2000:AEI.pdf Plaehn:2000:AEI.ps} } @InProceedings{Skut_Brants:1998, AUTHOR = {Skut, Wojciech and Brants, Thorsten}, TITLE = {A Maximum-Entropy Partial Parser for Unrestricted Text}, YEAR = {1998}, BOOKTITLE = {6th Workshop on Very Large Corpora, August 15-16}, PAGES = {143-151}, ADDRESS = {Montreal, Canada}, URL = {https://www.coli.uni-saarland.de/~thorsten/wvlc98/}, ABSTRACT = {This paper describes a partial parser that assigns syntactic structures to sequences of part-of-speech tags. The program uses the maximum entropy parameter estimation method, which allows a flexible combination of different knowledge sources: the hierarchical structure, parts of speech and phrasal categories. In effect, the parser goes beyond simple bracketing and recognises even fairly complex structures. We give accuracy figures for different applications of the parser.}, ANNOTE = {COLIURL : Skut:1998:MEP.pdf} } @InProceedings{Skut_Brants:1998_1, AUTHOR = {Skut, Wojciech and Brants, Thorsten}, TITLE = {Chunk Tagger - Statistical Recognition of Noun Phrases}, YEAR = {1998}, BOOKTITLE = {ESSLLI Workshop on Automated Acquisition of Syntax and Parsing, August 17-28}, ADDRESS = {Saarbrücken, Germany}, URL = {https://www.coli.uni-saarland.de/~thorsten/esslli98-parsing/}, ABSTRACT = {We describe a stochastic approach to partial parsing, i.e., the recognition of syntactic structures of limited depth. The technique utilises Markov Models, but goes beyond usual bracketing approaches, since it is capable of recognising not only the boundaries, but also the internal structure and syntactic category of simple as well as complex NP's, PP's, AP's and adverbials. We compare tagging accuracy for different applications and encoding schemes.}, ANNOTE = {COLIURL : Skut:1998:CTS.pdf} } @InProceedings{Skut_et_al:1997, AUTHOR = {Skut, Wojciech and Brants, Thorsten and Krenn, Brigitte and Uszkoreit, Hans}, TITLE = {Annotating Unrestricted German Text}, YEAR = {1997}, BOOKTITLE = {6. Fachtagung der Sektion Computerlinguistik der Deutschen Gesellschaft für Sprachwissenschaft (DGfS/CL 97), 8.-10. Oktober}, ADDRESS = {Heidelberg, Germany}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-DGfS97.pdf https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-DGfS97.ps.gz}, ANNOTE = {COLIURL : Skut:1997:AUG.pdf Skut:1997:AUG.ps} } @InProceedings{Skut_et_al:1998, AUTHOR = {Skut, Wojciech and Brants, Thorsten and Krenn, Brigitte and Uszkoreit, Hans}, TITLE = {A Linguistically Interpreted Corpus of German Newspaper Text}, YEAR = {1998}, BOOKTITLE = {Proceedings of the 10th European Summer School in Logic, Language and Information (ESSLLI'98). Workshop on Recent Advances in Corpus Annotation, August 17-28}, EDITOR = {Krenn, Brigitte and Brants, Thorsten and Skut, Wojciech and Uszkoreit, Hans}, ADDRESS = {Saarbrücken, Germany}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ESSLLI-Corpus98.pdf https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ESSLLI-Corpus98.ps.gz http://www.dfki.de/dfkibib/publications/docs/Krenn_1998_RACA}, ABSTRACT = {In this paper, we report on the development of an annotation scheme an annotation tools for unrestricted German text. Our representation format is based on argument structure, but also permits the extraction of other kinds of representations. We discuss several methodological issues and the analysis of some phenomena. Additional focus is on the tools developed in our project and their applications.}, ANNOTE = {COLIURL : Skut:1998:LIC.pdf Skut:1998:LIC.ps} } @InProceedings{Skut_et_al:1998, AUTHOR = {Skut, Wojciech and Brants, Thorsten and Krenn, Brigitte and Uszkoreit, Hans}, TITLE = {A Linguistically Interpreted Corpus of German Newspaper Text}, YEAR = {1998}, BOOKTITLE = {Proceedings of the 10th European Summer School in Logic, Language and Information (ESSLLI'98). Workshop on Recent Advances in Corpus Annotation, August 17-28}, EDITOR = {Krenn, Brigitte and Brants, Thorsten and Skut, Wojciech and Uszkoreit, Hans}, ADDRESS = {Saarbrücken, Germany}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ESSLLI-Corpus98.pdf https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ESSLLI-Corpus98.ps.gz http://www.dfki.de/dfkibib/publications/docs/Krenn_1998_RACA}, ABSTRACT = {In this paper, we report on the development of an annotation scheme an annotation tools for unrestricted German text. Our representation format is based on argument structure, but also permits the extraction of other kinds of representations. We discuss several methodological issues and the analysis of some phenomena. Additional focus is on the tools developed in our project and their applications.}, ANNOTE = {COLIURL : Skut:1998:LIC.pdf Skut:1998:LIC.ps} } @InProceedings{Skut_et_al:1997_1, AUTHOR = {Skut, Wojciech and Krenn, Brigitte and Brants, Thorsten and Uszkoreit, Hans}, TITLE = {An Annotation Scheme for Free Word Order Languages}, YEAR = {1997}, BOOKTITLE = {Proceedings of the 5th Conference on Applied Natural Language Processing (ANLP'97), March 31 - April 3}, EDITOR = {Jacobs, Paul}, ADDRESS = {Washington D.C., USA}, PUBLISHER = {Morgan Kaufmann Publishers}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ANLP97.pdf https://www.coli.uni-saarland.de/~thorsten/publications/Skut-ea-ANLP97.ps.gz}, ABSTRACT = {We describe an annotation scheme and a tool developed for creating linguistically annotated corpora for non-configurational languages. Since the requirements for such a formalism differ from those posited for configurational languages, several features have been added, influencing the architecture of the scheme. The resulting scheme reflects a stratificational notion of language, and makes only minimal assumptions about the interrelation of the particular representational strata.}, ANNOTE = {COLIURL : Skut:1997:ASF.pdf Skut:1997:ASF.ps} } @Article{Uszkoreit_et_al:1998, AUTHOR = {Uszkoreit, Hans and Brants, Thorsten and Duchier, Denys and Krenn, Brigitte and Konieczny, Lars and Oepen, Stefan and Skut, Wojciech}, TITLE = {Studien zur performanzorientierten Linguistik: Aspekte der Relativsatzextraposition im Deutschen}, YEAR = {1998}, JOURNAL = {Kognitionswissenschaft}, VOLUME = {7}, NUMBER = {3}, PAGES = {129-133}, ABSTRACT = {Am Beispiel der Relativsatzextraposition im Deutschen zeigt das Papier wie Verfahren der sprachwissenschaftlichen Modellbildung, korpuslinguistischen Untersuchung und des psycholinguistischen Experiments in einem integrativen Forschungsansatz zusammenwirken, der auf ein verbessertes Verständnis und die linguistisch wie kognitiv adäquate Modellierung sprachlicher Performanzprobleme zielt. Ausgehend von der von Hawkins (1994) formulierten Theorie zur Wortstellung werden Hypothesen über die positionelle Verteilung von Relativsätzen formuliert und in Bezug auf Korpusdaten und Akzeptabilitätsmessungen überprüft. Alle beschriebenen empirischen Untersuchungen bestätigen den erwarteten Einfluss von Längenfaktoren auf die Relativsatzdistribution, zeigen gleichzeitig aber eine interessante Asymmetrie zwischen Produktions- und Rezeptionsdaten.} } @TechReport{Uszkoreit_et_al:1998_1, AUTHOR = {Uszkoreit, Hans and Brants, Thorsten and Duchier, Denys and Krenn, Brigitte and Konieczny, Lars and Oepen, Stephan and Skut, Wojciech}, TITLE = {Studien zur performanzorientierten Linguistik. Aspekte der Relativsatzextraposition im Deutschen}, YEAR = {1998}, MONTH = {April}, NUMBER = {99}, ADDRESS = {Saarbrücken.}, TYPE = {CLAUS-Report}, INSTITUTION = {Universität des Saarlandes}, URL = {https://www.coli.uni-saarland.de/~thorsten/publications/Uszkoreit-ea-CLAUS99.pdf ftp://ftp.coli.uni-sb.de/pub/coli/claus/claus99.ps ftp://ftp.coli.uni-sb.de/pub/coli/claus/claus99.dvi}, ABSTRACT = {Am Beispiel der Relativsatzextraposition im Deutschen zeigt das Papier wie Verfahren der sprachwissenschaftlichen Modellbildung, korpuslinguistischen Untersuchung und des psycholinguistischen Experiments in einem integrativen Forschungsansatz zusammenwirken, der auf ein verbessertes Verständnis und die linguistisch wie kognitiv adäquate Modellierung sprachlicher Performanzprobleme zielt. Ausgehend von der von Hawkins (1994) formulierten Theorie zur Wortstellung werden Hypothesen über die positionelle Verteilung von Relativsätzen formuliert und in Bezug auf Korpusdaten und Akzeptabilitätsmessungen überprüft. Alle beschriebenen empirischen Untersuchungen bestätigen den erwarteten Einfluß von Längenfaktoren auf die Relativsatzdistribution, zeigen gleichzeitig aber eine interessante Asymmetrie zwischen Produktions- und Rezeptionsdaten. Ein gekürzte Fassung erscheint in Kognitionswissenschaft, Themenheft SFB 378, 1998.}, ANNOTE = {COLIURL : Uszkoreit:1998:SPLb.pdf Uszkoreit:1998:SPLb.ps} } @Proceedings{Hans_et_al:1999, TITLE = {Linguistically Interpreted Corpora. Proceedings of the Workshop LINC-1999 at the 9th Conference of the European Chapter of the Association of Computational Linguistics}, YEAR = {1999}, EDITOR = {Uszkoreit, Hans and Brants, Thorsten and Krenn, Brigitte}, ADDRESS = {Bergen, Norway} }