% % GENERATED FROM https://www.coli.uni-saarland.de % by : anonymous % IP : coli2006.lst.uni-saarland.de % at : Mon, 05 Feb 2024 15:42:15 +0100 GMT % % Selection : Author: Jakub_Piskorski % @InProceedings{Abramowicz_Piskorski:2002, AUTHOR = {Abramowicz, Witold and Piskorski, Jakub}, TITLE = {Information Extraction from Free Text Business Documents}, YEAR = {2002}, BOOKTITLE = {Proceedings of IRMA - Information Resources Management Association International Conference, May 19-22}, ADDRESS = {Seattle}, ABSTRACT = {One of the most difficult aspects of using search technology is the process of getting information in shape for searching. The objective of this paper is an investigation of the applicability of information extraction techniques in real-world business applications dealing with textual data since business relevant data is mainly transmitted through free-text documents. Further, we demonstrate an enormous indexing potential of lightweight linguistic text processing techniques applied in information extraction systems in other closely related fields of information technology which concern processing vast amounts of textual data.} } @InProceedings{Becker_et_al:2002, AUTHOR = {Becker, Markus and Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Schäfer, Ulrich and Xu, Feiyu}, TITLE = {SProUT - Shallow Processing with Typed Feature Structures and Unification}, YEAR = {2002}, BOOKTITLE = {Proceedings of the International Conference on NLP (ICON 2002). December 18-21}, ADDRESS = {Mumbai, India}, URL = {http://www.dfki.de/~feiyu/sprout.pdf}, ABSTRACT = {We present SProUT, a platform for the development of multilingual shallow text processing systems. A grammar in SProUT consists of a set of rules, where the left-hand side is a regular expression over typed feature structures (TFSs), representing the recognition pattern, and the right-hand side is a sequence of TFSs, specifying how the output structure looks like. The reusable core components fo SProUT are a finite-state machine toolkit, a regular compiler, a typed feature structure package, and a finite-state machine interpreter.}, ANNOTE = {COLIURL : Becker:2002:SSP.pdf} } @InProceedings{Crysmann_et_al:2002, AUTHOR = {Crysmann, Berthold and Frank, Anette and Kiefer, Bernd and Krieger, Hans-Ulrich and Müller, Stefan and Neumann, Günter and Piskorski, Jakub and Schäfer, Ulrich and Siegel, Melanie and Uszkoreit, Hans and Xu, Feiyu}, TITLE = {An Integrated Architecture for Shallow and Deep Processing}, YEAR = {2002}, BOOKTITLE = {Proceedings of ACL-2002, Association for Computational Linguistics 40th Anniversary Meeting, July 7-12}, ADDRESS = {Philadelphia, USA}, URL = {http://www.dfki.de/~feiyu/wb-acl02.pdf http://www.dfki.de/~neumann/publications/new-ps/wb-acl02.pdf}, ANNOTE = {COLIURL : Crysmann:2002:IAS.pdf} } @Article{Neumann_Piskorski:2002, AUTHOR = {Neumann, Günter and Piskorski, Jakub}, TITLE = {A Shallow Text Processing Core Engine}, YEAR = {2002}, JOURNAL = {Journal of Computational Intelligence}, VOLUME = {18}, NUMBER = {3}, URL = {http://www.dfki.de/~neumann/publications/new-ps/comp-intell.pdf}, NOTE = {to appear}, ANNOTE = {COLIURL : Neumann:2002:STP.pdf} } @InProceedings{Neumann_et_al:2000, AUTHOR = {Neumann, Günter and Piskorski, Jakub and Braun, Christian}, TITLE = {An Intelligent Text Extraction and Navigation System}, YEAR = {2000}, BOOKTITLE = {Proceedings of the 6th Applied Natural Language Processing Conference (ANLP'00). 1st Meeting of the North American Chapter of the Association for Computational Linguistics (NAACL'00), April 29 - May 4}, PAGES = {239-246}, EDITOR = {Nirenburg, Sergei and Appelt, Douglas and Ciravegna, Fabio and Dale, Robert}, ADDRESS = {Seattle, Washington, USA}, PUBLISHER = {ACL}, URL = {ftp://lt-ftp.dfki.uni-sb.de/pub/papers/local/final-acl2000.ps.gz ftp://lt-ftp.dfki.uni-sb.de/pub/papers/local/final-acl2000.entry}, ANNOTE = {COLIURL : Neumann:2000:ITE.pdf Neumann:2000:ITE.ps} } @TechReport{Piskorski:2002, AUTHOR = {Piskorski, Jakub}, TITLE = {Finite-State Machine Toolkit}, YEAR = {2002}, NUMBER = {RR-02-04}, ADDRESS = {Saarbrücken}, TYPE = {Technical Report}, INSTITUTION = {DFKI}, URL = {http://www.dfki.de/dfkibib/publications/docs/Piskorski_2002_DFSMTK.pdf}, ABSTRACT = {Finite-state devices such as finite-state automata and finite-state transducers have been known since the emergence of computer science and are recently extensively used in many areas of language technology. The use of finite-state devices is mainly motivated by their time and space efficiency. In this paper we present the Finite-State Machine Toolkit for building, combining and optimizing the finite-state machines, developed at the Language Technology Lab of the German Research Cener for Artficial Intelligence.}, ANNOTE = {COLIURL : Piskorski:2002:FSM.pdf} } @InProceedings{Piskorski_et_al:2002, AUTHOR = {Piskorski, Jakub and Drozdzynski, Witold and Xu, Feiyu and Scherf, Oliver}, TITLE = {A Flexible XML-based Regular Compiler for Creation and Converting Linguistic Resources}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 3rd International Conference on Language Resources an Evaluation (LREC'02)}, ADDRESS = {Las Palmas, Canary Islands, Spain}, URL = {http://www.dfki.de/~feiyu/regular_compiler.pdf}, ABSTRACT = {Finite-state devices are widely used to compactly model linguistic phenomena, whereas regular expressions are regarded as the adequate level of abstraction for thinking about finite-state languages. In this paper we present a flexible XML-based and Unicode-compatible regular compiler for creating, and integrating existing linguistic resources. Our tool provides user-friendly graphical interface which enables the transparent control of the compilation process and allows for testing generated finite-state grammars with several diagnostic tools. Through the direct database connection, existing linguistic resources can be converted into user-definable finite-state representations.}, ANNOTE = {COLIURL : Piskorski:2002:FXB.pdf} } @InProceedings{Piskorski_et_al:2002_1, AUTHOR = {Piskorski, Jakub and Jaeger, Tilman and Xu, Feiyu}, TITLE = {A Framework for Domain and Task Adaptive Named-Entity Recognition}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 5th International Baltic Conference on Databases and Information Systems, June 3-6}, ADDRESS = {Tallinn, Estonia}, URL = {http://www.dfki.de/~feiyu/balt.tar.gz}, ABSTRACT = {Robust Named--Entity Recognition software is an essential preprocessing tool for performing more complex text processing tasks in business information systems. In this paper we present a Framework for Domain and Task Adaptive Named--Entity Recognition. It consists of several clear--cut subcomponents which can be flexibly and variably combined together in order to construct a task--specific NE--Recognition tool. Additionally, a diagnostic tool for automatic prediction of best system configuration is provided, which speeds up the development cycle.}, ANNOTE = {COLIURL : Piskorski:2002:FDT.tar} } @InProceedings{Piskorski_Neumann:2000, AUTHOR = {Piskorski, Jakub and Neumann, Günter}, TITLE = {An Intelligent Text Extraction and Navigation System}, YEAR = {2000}, BOOKTITLE = {Proceedings of the 6th International Conference on Computer-Assisted Information Retrieval (RIAO'00)}, ADDRESS = {Paris, France}, URL = {ftp://lt-ftp.dfki.uni-sb.de/pub/papers/local/sppc.ps.gz}, ABSTRACT = {We present SPPC, a high-performance system for intelligent text extraction and navigation from German free text documents. SPPC consists of a set of domain-independent shallow core components which are realized by means of cascaded weighted finite state machines and generic dynamic tries. All extracted information is represented uniformly in one data structure (called the text chart) in a highly compact and linked form in order to support indexing and navigation through the set of solutions. German text processing includes (among others) compound processing, high performance named entity recognition and chunk parsing based on a divide-and-conquer strategy. SPPC has a good performance (4380 words per second on standard PC environments) and high linguistic coverage.}, ANNOTE = {COLIURL : Piskorski:2000:ITE.pdf Piskorski:2000:ITE.ps} } @InProceedings{Piskorski_Skut:2000, AUTHOR = {Piskorski, Jakub and Skut, Wojciech}, TITLE = {Intelligent Information Extraction}, YEAR = {2000}, BOOKTITLE = {Proceedings of the 4th International Conference on Business Information Systems, April 24-25}, ADDRESS = {Poznan, Poland}, ABSTRACT = {New developments in Information Technology and an ever-growing amount of unstructured business text documents in digital form require intelligent tools for precisely determining their content and relevance. In this paper we give an overview of the natural language processing approach to information extraction and information retrieval. Our article contains a brief description of efficient linguistic core components.} } @InProceedings{Xu_et_al:2002, AUTHOR = {Xu, Feiyu and Kurz, Daniela and Piskorski, Jakub and Schmeier, Sven}, TITLE = {Term Extraction and Mining Term Relations from Free-Text Documents in the Financial Domain}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 5th International Conference on Business Information Systems (BIS'02), April 24-25}, ADDRESS = {Poznan, Poland}, URL = {http://www.dfki.de/~feiyu/Bis2002.pdf}, ABSTRACT = {In this paper, we present an approach to learning domain relevant terms automatically. We took the financial domain as our experiment domain.}, NOTE = {to appear}, ANNOTE = {COLIURL : Xu:2002:TEM.pdf} } @InProceedings{Xu_et_al:2002_1, AUTHOR = {Xu, Feiyu and Kurz, Daniela and Piskorski, Jakub and Schmeier, Sven}, TITLE = {A Domain Adaptive Approach to Automatic Acquisition of Domain Relevant Terms and their Relations with Bootstrapping}, YEAR = {2002}, BOOKTITLE = {Proceedings of the 3rd International Conference on Language Resources an Evaluation (LREC'02), May 29-31}, ADDRESS = {Las Palmas, Canary Islands, Spain}, URL = {http://www.dfki.uni-sb.de/~feiyu/LREC_TermExtraction_final.pdf}, ABSTRACT = {In this paper, we present an unsupervised hybrid text-mining approach to automatic acquisition of domain relevant terms and their relations. We deploy the TFIDF-based term classification method to acquire domain relevant single-word terms. Further, we apply two strategies in order to learn lexico-syntatic patterns which indicate paradigmatic and domain relevant syntagmatic relations between the extracted terms. The first one uses an existing ontology as initial knowledge for learning lexico-syntactic patterns, while the second is based on different collocation acquisition methods to deal with the free-word order languages like German. This domain-adaptive method yields good results even when trained on relatively small training corpora. It can be applied to different real-world applications, which need domain-relevant ontology, for example, information extraction, information retrieval or text classification.}, ANNOTE = {COLIURL : Xu:2002:DAA.pdf} } @InProceedings{Bering_et_al:2003, AUTHOR = {Bering, Ch. and Drozdzynski, Witold and Erbach, Gregor and Guasch, C. and Homola, P. and Lehmann, Sabine and Li, Hong and Krieger, Hans-Ulrich and Piskorski, Jakub and Schäfer, Ulrich and Shimada, Atsuko and Siegel, Melanie and Xu, Feiyu and Ziegler-Eisele, Dorothea}, TITLE = {Corpora and Evaluation Tools for Multilingual Named Entity Grammar Development}, YEAR = {2003}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Corpora, Corpus Linguistics Conference}, EDITOR = {Neumann, Stella and Hansen-Schirra, Silvia}, ADDRESS = {Lancaster} } @InProceedings{Busemann_et_al:2003, AUTHOR = {Busemann, Stephan and Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Schäfer, Ulrich and Uszkoreit, Hans and Xu, Feiyu}, TITLE = {Integrating Information Extraction and Automatic Hyperlinking}, YEAR = {2003}, BOOKTITLE = {Proceedings of ACL-2003, 41st Annual Meeting of the Association for Computational Linguistics}, ADDRESS = {Sapporo, Japan} } @InProceedings{Drozdzynski_et_al:2003, AUTHOR = {Drozdzynski, Witold and Homola, P. and Piskorski, Jakub}, TITLE = {Adapting SproUT to processing Baltic ans Slavonic Languages}, YEAR = {2003}, BOOKTITLE = {Proceedings of IESL'03 Workshop held in conjunction with the RANLP Recent Advances in Natural Language Processing 2003 conference}, ADDRESS = {Borovets, Bulgaria} } @InProceedings{Abramowicz_Piskorski:2002_1, AUTHOR = {Abramowicz, Witold and Piskorski, Jakub}, TITLE = {Information Extraction from Free Text Business Documents}, YEAR = {2002}, BOOKTITLE = {Proceedings of IRMA - Information Resources Management Association International Conference, May 19-22}, ADDRESS = {Seattle}, ABSTRACT = {One of the most difficult aspects of using search technology is the process of getting information in shape for searching. The objective of this paper is an investigation of the applicability of information extraction techniques in real-world business applications dealing with textual data since business relevant data is mainly transmitted through free-text documents. Further, we demonstrate an enormous indexing potential of lightweight linguistic text processing techniques applied in information extraction systems in other closely related fields of information technology which concern processing vast amounts of textual data.} } @InProceedings{Abramowicz_Piskorski:2002_2, AUTHOR = {Abramowicz, Witold and Piskorski, Jakub}, TITLE = {Information Extraction from Free Text Business Documents}, YEAR = {2002}, BOOKTITLE = {Proceedings of IRMA - Information Resources Management Association International Conference, May 19-22}, ADDRESS = {Seattle}, ABSTRACT = {One of the most difficult aspects of using search technology is the process of getting information in shape for searching. The objective of this paper is an investigation of the applicability of information extraction techniques in real-world business applications dealing with textual data since business relevant data is mainly transmitted through free-text documents. Further, we demonstrate an enormous indexing potential of lightweight linguistic text processing techniques applied in information extraction systems in other closely related fields of information technology which concern processing vast amounts of textual data.} } @InProceedings{Abramowicz_Piskorski:2002_3, AUTHOR = {Abramowicz, Witold and Piskorski, Jakub}, TITLE = {Information Extraction from Free Text Business Documents}, YEAR = {2002}, BOOKTITLE = {Proceedings of IRMA - Information Resources Management Association International Conference, May 19-22}, ADDRESS = {Seattle}, ABSTRACT = {One of the most difficult aspects of using search technology is the process of getting information in shape for searching. The objective of this paper is an investigation of the applicability of information extraction techniques in real-world business applications dealing with textual data since business relevant data is mainly transmitted through free-text documents. Further, we demonstrate an enormous indexing potential of lightweight linguistic text processing techniques applied in information extraction systems in other closely related fields of information technology which concern processing vast amounts of textual data.} } @InProceedings{Abramowicz_Piskorski:2002_4, AUTHOR = {Abramowicz, Witold and Piskorski, Jakub}, TITLE = {Information Extraction from Free Text Business Documents}, YEAR = {2002}, BOOKTITLE = {Proceedings of IRMA - Information Resources Management Association International Conference, May 19-22}, ADDRESS = {Seattle}, ABSTRACT = {One of the most difficult aspects of using search technology is the process of getting information in shape for searching. The objective of this paper is an investigation of the applicability of information extraction techniques in real-world business applications dealing with textual data since business relevant data is mainly transmitted through free-text documents. Further, we demonstrate an enormous indexing potential of lightweight linguistic text processing techniques applied in information extraction systems in other closely related fields of information technology which concern processing vast amounts of textual data.} } @InBook{NeuSchm2013b, AUTHOR = {Neumann, Günter and Schmeier, Sven}, TITLE = {Interactive topic graph extraction and exploration of web content}, YEAR = {2013}, BOOKTITLE = {Multi-source, multilingual information extraction and summarization.}, EDITOR = {Poibeau, Thierry and Saggion, Horacio and Piskorski, Jakub and Yangarber, Roman}, SERIES = {Theory and Applications of Natural Language Processing}, PUBLISHER = {Springer}, CHAPTER = {7}, NOTE = {HU} }