% % GENERATED FROM https://www.coli.uni-saarland.de % by : anonymous % IP : coli2006.lst.uni-saarland.de % at : Mon, 05 Feb 2024 15:43:04 +0100 GMT % % Selection : Author: Diana_Raileanu % @InProceedings{Buitelaar_et_al:2001, AUTHOR = {Buitelaar, Paul and Alexandersson, Jan and Jaeger, Tilman and Lesch, Stephan and Pfleger, Norbert and Raileanu, Diana and von den Berg, Tanja and Klöckner, Kerstin and Neis, Holger and Schlarb, Hubert}, TITLE = {An Unsupervised Semantic Tagger Applied to German}, YEAR = {2001}, BOOKTITLE = {Proceedings of the 3rd Conference on Recent Advances in Natural Language Processing (RANLP'01), September 5-7}, ADDRESS = {Tzigov Chark, Bulgaria}, URL = {ftp://lt-ftp.dfki.uni-sb.de/pub/papers/local/ranlp01.ps ftp://lt-ftp.dfki.uni-sb.de/pub/papers/local/ranlp01.pdf http://dfki.de/~paulb/ranlp01.pdf}, ABSTRACT = {We describe an unsupervised semantic tagger, applied to German, but which could be used with any language for which a corresponding XNet (WordNet, GermaNet, e tc.), POS tagger and morphological analyzer are available. Disambiguation is per formed by comparing co-occurrence weights on pairs of semantic classes (synsets from GermaNet). Precision is around 67% at a recall of around 65% (for all ambig uous words -- 81% for all words at a recall of 80%). Our results show the influe nce of context size and of semantic class frequency in the training corpus.}, ANNOTE = {COLIURL : Buitelaar:2001:UST.pdf Buitelaar:2001:UST.ps} } @InProceedings{Raileanu_et_al:2002, AUTHOR = {Raileanu, Diana and Buitelaar, Paul and Vintar, Spela and Bay, Jörg}, TITLE = {Evaluation Corpora for Sense Disambiguation in the Medical Domain}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 3rd International Conference on Language Resources and Evaluation (LREC'02), May 29-31}, ADDRESS = {Las Palmas, Canary Islands, Spain}, URL = {http://dfki.de/~paulb/lrec2002.eval.ps http://www.dfki.de/dfkibib/publications/docs/lrec2002.eval.final.pdf http://www2.arnes.si/~svinta/lrec2002.eval.final.pdf}, ABSTRACT = {An important aspect of word sense disambiguation is the evaluation of different methods and parameters. Unfortunately, there is a lack of test sets for evaluation, specifically for languages other than English and even more so for specific domains like medicine. Given that our work focuses on English as well as German text in the medical domain, we had to develop our own evaluation corpora in order to test our disambiguation methods. In this paper we describe the work on developing these corpora, using GermaNet and UMLS as (lexical) semantic resources, next to a description of the annotation tool KiC that we developed for support of the annotation task.}, ANNOTE = {COLIURL : Raileanu:2002:ECS.pdf Raileanu:2002:ECS.ps} } @InProceedings{Vintar_et_al:2002, AUTHOR = {Vintar, Spela and Buitelaar, Paul and Ripplinger, Bärbel and Sacaleanu, Bogdan and Raileanu, Diana and Prescher, Detlef}, TITLE = {An Efficient and Flexible Format for Linguistic and Semantic Annotation}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 3rd International Conference on Language Resources and Evaluation (LREC'02), May 29-31}, ADDRESS = {Las Palmas, Canary Islands, Spain}, URL = {http://dfki.de/~paulb/lrec2002.dtd.ps http://www.dfki.de/dfkibib/publications/docs/lrec2002.dtd.final.pdf}, ABSTRACT = {The paper describes an XML annotation format and tool developed within the MUCHMORE project. The annotation scheme was designed specifically for the purposes of Cross-Lingual Information Retrieval in the medical domain so as to allow both efficient and flexible access to layers of information. We use a parallel English-German corpus of medical abstracts and annotate it with linguistic information (tokenisation, part-of-speech tagging, lemmatisation and decomposition, phrase recognition, grammatical functions) as well as semantic information from various sources. The annotation of medical terms/concepts, semantic types and semantic relations is based on the Unified Medical Language System (UMLS). Additionally, we use EuroWordNet as a general-language resource in annotating word senses and to compare domain-specific and general language use. A major aim of the project is also to complement existing ontological resources by extracting new terms and new semantic relations. We present the annotation scheme, which is conceptually related to stand-off annotation, and describe our tool for automatic semantic annotation.}, ANNOTE = {COLIURL : Vintar:2002:EFF.pdf Vintar:2002:EFF.ps} } @Article{Volk_et_al:2002, AUTHOR = {Volk, Martin and Ripplinger, Bärbel and Vintar, Špela and Buitelaar, Paul and Raileanu, Diana and Sacaleanu, Bogdan}, TITLE = {Semantic Annotation for Concept-Based Cross-Language Medical Information Retrieval}, YEAR = {2002}, JOURNAL = {International Journal of Medical Informatics}, VOLUME = {67}, NUMBER = {1-3}, PAGES = {97-112}, URL = {http://dfki.de/~paulb/jmi.pdf}, ABSTRACT = {We present a framework for concept-based cross-language information retrieval in the medical domain, which is under development in the MUCHMORE project. Our approach is based on using the Unified Medical Language System (UMLS) as the primary source of semantic data. Documents and queries are annotated with multiple layers of linguistic information. Linguistic processing includes part-of-speech tagging, morphological analysis, phrase recognition and the identification of medical terms and semantic relations between them. The paper describes experiments in monolingual and cross-language document retrieval, performed on a corpus of medical abstracts. Results show that linguistic processing, especially lemmatization and compound analysis for German, is a crucial step in achieving a good baseline performance. On the other hand, they show that semantic information, specifically the combined use of concepts and relations, increases the performance in monolingual and cross-language retrieval.}, ANNOTE = {COLIURL : Volk:2002:SAC.pdf} } @MastersThesis{Raileanu:2002, AUTHOR = {Raileanu, Diana}, TITLE = {Semantic Tagging of Medical Texts with GermaNet}, YEAR = {2002}, ADDRESS = {Saarbrücken, Germany}, SCHOOL = {Computerlinguistik, Universität des Saarlandes}, TYPE = {Diplomarbeit} }