% % GENERATED FROM https://www.coli.uni-saarland.de % by : anonymous % IP : coli2006.lst.uni-saarland.de % at : Mon, 05 Feb 2024 15:41:48 +0100 GMT % % Selection : Author: Josef_van-Genabith % @InCollection{Crouch_et_al:2001, AUTHOR = {Crouch, Richard and Frank, Anette and van Genabith, Josef}, TITLE = {Glue, Underspecification and Translation}, YEAR = {2001}, BOOKTITLE = {Computing Meaning Volume 2}, EDITOR = {Bunt, Harry}, PUBLISHER = {Kluwer Academic Publishers}, URL = {http://www2.parc.com/istl/members/crouch/iwcs3.pdf}, ABSTRACT = {This paper sketches how one can construct Underspecified Discourse Representation Structures (UDRSs) (Reyle, 1993) via glue semantics (Dalrymple et al. 1999). In most cases, UDRSs are constructed in linear time, analogously to the linear time construction of skeleton-modifier representations presented in (Gupta and Lamping, 1998). We show how this encoding can be used in ambiguity preserving, transfer-based machine translation, where it reduces problems with structural misalignment, such as head-switching problem.}, ANNOTE = {COLIURL : Crouch:2001:GUT.pdf} } @InProceedings{Crouch_et_al:2001_1, AUTHOR = {Crouch, Richard and Frank, Anette and van Genabith, Josef}, TITLE = {Linear Logic based Transfer and Structural Misalignment}, YEAR = {2001}, BOOKTITLE = {4th International Workshop on Comutational Semantics (IWCS-4), January 10-12}, EDITOR = {Bunt, Harry and van der Sluis, Ielka and Thijsse, Elias}, ADDRESS = {Tilburg, The Netherlands}, URL = {http://www.dfki.de/~frank/papers/iwcs4.ps}, ABSTRACT = {Genabith, Frank and Dorna, 1998) described an approach to ambiguity preserving machine translation, where transfer takes place on the glue language meaning constructors of (Dalrymple et al. 1996). Unfortunately, it did not deal with structural misalignment problems, such as embedded head switching, in a fully satisfactory way. This paper proposes the use of a fragment of linear logic as a transfer formalism, and shows how it provides a more general and satisfactory solution to the difficulties encountered by (Genabith, Frank and Dorna, 1998).}, ANNOTE = {COLIURL : Crouch:2001:LLB.pdf Crouch:2001:LLB.ps} } @InProceedings{Frank_van Genabith:2001, AUTHOR = {Frank, Anette and van Genabith, Josef}, TITLE = {LL-based Semantics for LTAG - and what it teaches us about LFG and LTAG}, YEAR = {2001}, BOOKTITLE = {Proceedings of the 6th International Lexical Functional Grammar Conference (LFG'01), June 25-27}, EDITOR = {Butt, Miriam and Holloway King, Tracy}, ADDRESS = {Hong Kong}, PUBLISHER = {CSLI Online Publications}, URL = {http://csli-publications.stanford.edu/LFG/6/lfg01frankgenabith.pdf}, ABSTRACT = {We review existing appoaches to semantics construction in LTAG (Lexicalised Tree Adjoining Grammar) based on the notion of derivation (tree)s. We argue that derivation structures in LTAG are not appropriate to guide semantic composition, due to a non-isomorphism, in LTAG between the syntactic operation of adjunction on the one hand, and the semantic operations of complementation and modifcation, on the other. Linear Logic based glue semantics, as developed within the LFG framework (cf. Dalrymple (1999)), allows for flexible coupling of syntactic and semantic structure. We investigate application of glue semantics to LTAG syntax, using as underlying structure the derived tree, which is more appropriate for principle-based semantics construction. We show how Linear Logic based semantics construction helps to bridge the non-isomorphism between syntactic and semantic operations in LTAG. The glue approach captures non-tree local dependencies in control and modifcation structures, and extends to the treatment of scope ambiguity with quantified NPs and VP modifers. Finally, glue semantics applies successfully to the adjunction-based analysis of long-distance dependencies in LTAG, which differs signifcantly from the f-structure based analysis in LFG.}, ANNOTE = {COLIURL : Frank:2001:LBS.pdf} } @InProceedings{van Genabith_et_al:2001, AUTHOR = {van Genabith, Josef and Frank, Anette and Way, Andy}, TITLE = {Treebank vs. X-bar based Automatic F-Structure Annotation}, YEAR = {2001}, BOOKTITLE = {Proceedings of the 6th International Lexical Functional Grammar Conference (LFG'01), June 25-27}, EDITOR = {Butt, Miriam and Holloway King, Tracy}, ADDRESS = {Hong Kong}, PUBLISHER = {CSLI Online Publications}, URL = {http://csli-publications.stanford.edu/LFG/6/lfg01genabithfrankway.pdf}, ABSTRACT = {Manual, large scale (computational) grammar development is time consuming, expensive and requires lots of linguistic expertise. More recently, a number of alternatives based on treebank resources (such as Penn-II, Susanne, AP treebank) have been explored. The idea is to automatically “induce” or rather read off (P)CFG grammars from the parse annotated treebank resources and to use the treebank grammars thusobtained in (probabilistic) parsing or as a starting point for further grammar development. The approach is cheap, fast, automatic, large scale, “data driven” and based on real language resources. Treebankgrammars typically involve large sets of lexical tags and non-lexical categories as syntactic informationtends to be encoded in monadic category symbols. They feature flat rules (trees) that can “underspecify” attachment possibilities. Treebank grammars do not in general follow Xbar architectural design principles (this is not to say that treebank grammars do not have design principles). As a consequence, treebank grammars tend to have very large CFG rule bases (e.g. Penn-II > 17,000 CFG rules for about 1 million words of text) with often only minimally differing rules. Even though treebank grammars are large, they are still incomplete, exhibiting unabated rule accession rates. From a grammar engineering point of view, the size of the rule base poses problems for maintainability, extendability and, if a treebank grammar is to be used as a CF-base in a LFG grammar, for functional (feature-structure) annotations. From the point of view of theoretical linguistics, flat treebank trees and treebank grammars extracted from such trees do not express linguistic generalisations. From the perspective of empirical and corpus linguistics, flat trees are well-motivated as they allow underspecification of subtle and often time consuming attachment decisions. Indeed, it is sometimes doubted whether highly general Xbar schemata usefully scale to “real” language. In previous work we developed methodologies for automatic feature-structure annotation of grammars extracted from treebanks. Automatic annotation of “raw” treebank grammars is difficult as annotation rules often need to identify subsequences in the RHSs of flat treebank rules as they explicitly encode head, complement and modifier relations. Xbar-based CFG rules should substantially facilitate automatic feature-structure annotation of grammar rules. In the present paper we conduct a number of experiments to explore a space of possible grammars based on a small fragment of the AP treebank resource. Starting with the original treebank fragment we automatically extract a CFG G. We then apply an automatic structure preserving grammar compaction step which generalises categories in the original treebank fragment and reduces the number of rules extracted, resulting in a generalised treebank fragment and in a compacted grammar Gc. The generalised fragment is then manually corrected to catch missed constituents (and the like) resulting in an automatically extracted, compacted and (effectively manually) corrected grammar Gc;m. Manual correction proceeds in the “spirit” of treebank grammars (we do not introduce Xbar analyses). We then explore how many of the manual correction steps on treebank trees can be achieved automatically. We develop, implement and test an automatic treebank “grooming” methodology which is applied to the generalised treebank fragment to yield a compacted and automatically corrected grammar Gc;a. Grammars Gc;m and Gc;a are very similar to compiled out “flat” LFG-82 style grammars. We explore regular expression based compaction (both manual and automatic) to relate Gc;m to a LFG-82 style grammar design. Finally, we manually recode a subsection of the generalised and manually corrected treebank fragment into “vanilla-flavour” Xbar based trees. From these we extract a compacted, manually corrected, Xbar-based grammar Gc;m;x. We evaluate our grammars and methods using standard labelled bracketing measures and according to how well they perform under automatic feature-structure annotation tasks.}, ANNOTE = {COLIURL : Genabith:19xx:TVX.pdf} } @InProceedings{KüReGe2009, AUTHOR = {Kübler, Sandra and Rehbein, Ines and van Genabith, Josef}, TITLE = {A testsuite for testing parser performance on complex German grammatical constructions}, YEAR = {2009}, BOOKTITLE = {Proceedings of the Seventh International Workshop on Treebanks and Linguistic Theories : Groningen, Netherlands, January 23-24, 2009}, NOTE = {MP} } @InProceedings{ReGe2009, AUTHOR = {Rehbein, Ines and van Genabith, Josef}, TITLE = {Automatic acquisition of LFG resources for German — as good as it gets}, YEAR = {2009}, BOOKTITLE = {Proceedings of LFG09: 13-16 July 2009, Cambridge, UK / 14th Lexical Functional Grammar Conference}, PAGES = {480-500}, ADDRESS = {Stanford, Ca}, PUBLISHER = {CSLI Publications}, NOTE = {MP} } @InProceedings{ChDiGe2008, AUTHOR = {Chrupala, Gzegorz and Dinu, Georgiana and van Genabith, Josef}, TITLE = {Learning morphology with morfette}, YEAR = {2008}, BOOKTITLE = {Proceedings of the 6th International Conference on Language Resources and Evaluation : LREC 2008, May 26 - June 1, 2008, Palais des congrès Mansour Eddahbi, Marrakech, Morocco}, PAGES = {2362-2367}, ADDRESS = {Paris}, ORGANIZATION = {ELRA}, NOTE = {DK, MP} } @InProceedings{SeChCeVaCa2010, AUTHOR = {Seedah, Djame and Chrupala, Gzegorz and Cetinoglu, Ozlem and van Genabith, Josef and Candito, Marie}, TITLE = {Lemmatization and Lexicalized Statistical Parsing of Morphologically Rich Languages: the Case of French.}, YEAR = {2010}, BOOKTITLE = {NAACL SPMRL 2010 workshop.}, NOTE = {DK} } @InProceedings{SeReKuGe2010, AUTHOR = {Seeker, Wolfgang and Rehbein, Ines and Kuhn, Jonas and van Genabith, Josef}, TITLE = {Hard Constraints for Grammatical Function Labelling}, YEAR = {2010}, BOOKTITLE = {The 48th Annual Meeting of the Association for Computational Linguistics (ACL 2010). July 11-16, 2010, Uppsala, Finland.}, NOTE = {MP} } @Article{BurLom2014, AUTHOR = {Burchardt, Aljoscha and Lommel, Arle Richard and Rehm, Georg and Sasaki, Felix and van Genabith, Josef and Uszkoreit, Hans}, TITLE = {Language Technology Drives Quality Translation}, YEAR = {2014}, JOURNAL = {MultiLingual}, VOLUME = {143}, PAGES = {33-39} } @InProceedings{RehUsz2014, AUTHOR = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophie and Bel, Núria and Bieleviciene, Audrone and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabik, Radovan and Grobelnik, Marko and García-Mateo, Carmen and van Genabith, Josef and Hajic, Jan and Hernáez, Inma and Judge, John and Koeva, S. and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and McNaught, John and Melero, Maite and Monachini, Monica and Moreno, Asunción and Odjik, Jan and Ogrodniczuk, Maciej and Pezik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eirikur and Rosner, Michael and Sandford Pederson, Bolette and Skadina, Inguna and De Smedt, Koenraad and Tadic, Marko and Thompson, Paul and Váradi, Tamás and Vasiljevs, Andrejs and Vider, Kadri and Zabarskaite, Jolanta}, TITLE = {The Strategic Impact of META-NET on the Regional, National and International Level}, YEAR = {2014}, BOOKTITLE = {Proceedings of the 9th Language Resources and Evaluation Conference}, ADDRESS = {Reykjavik, Iceland} } @InProceedings{EspGen2017, AUTHOR = {España-Bonet, Cristina and van Genabith, Josef}, TITLE = {Going beyond zero-shot MT: combining phonological, morphological and semantic factors. The UdS-DFKI System at IWSLT 2017}, YEAR = {2017}, MONTH = {December}, BOOKTITLE = {Proceedings of the 14th International Workshop on Spoken Language Translation (IWSLT)}, PAGES = {15-22}, ADDRESS = {Tokyo, Japan}, NOTE = {LT} } @Article{EVBG2017, AUTHOR = {España-Bonet, Cristina and Varga, Ádám Csaba and Barrón-Cedeño, Alberto and van Genabith, Josef}, TITLE = {An Empirical Analysis of NMT-Derived Interlingual Embeddings and their Use in Parallel Sentence Identification}, YEAR = {2017}, JOURNAL = {IEEE Journal of Selected Topics in Signal Processing}, VOLUME = {11}, NUMBER = {8}, PAGES = {1340-1350}, NOTE = {LT} } @Article{KaDeGe2017, AUTHOR = {Karakanta, Alina and Dehdari, Jon and van Genabith, Josef}, TITLE = {Neural machine translation for low-resource languages without parallel corpora}, YEAR = {2017}, MONTH = {November}, JOURNAL = {Machine Translation}, PAGES = {1–23}, NOTE = {LT} } @InProceedings{HeiNeuGen2017, AUTHOR = {Heigold, Georg and Neumann, Günter and van Genabith, Josef}, TITLE = {An Extensive Empirical Evaluation of Character-Based Morphological Tagging for 14 Languages}, YEAR = {2017}, MONTH = {April}, BOOKTITLE = {Proceedings of the 15th European Chapter of the Association for Computational Linguistics (EACL)}, PAGES = {505-5013}, ADDRESS = {Valencia, Spanien}, NOTE = {LT} } @InProceedings{UszGabHen2017, AUTHOR = {Uszkoreit, Hans and Gabryszak, Aleksandra and Hennig, Leonhard and Steffen, Jörg and Ai, Renlong and Busemann, Stephan and Dehdari, Jon and van Genabith, Josef and Heigold, Georg and Rethmeier, Nils and Rubino, Raphael and Schmeier, Sven and Thomas, Philippe and Wang, He and Xu, Feiyu}, TITLE = {Common Round: Application of Language Technologies to Large-Scale Web Debates}, YEAR = {2017}, MONTH = {April}, BOOKTITLE = {Proceedings of the Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics}, PAGES = {5-8}, ADDRESS = {Valencia, Spanien}, NOTE = {LT} }