% generated by bibtexbrowser % % Encoding: UTF-8 @inproceedings{beetz_agilo_2002, author = {M Beetz and S Buck and R Hanek and T Schmitt and B Radig}, title = {The {AGILO} Autonomous Robot Soccer Team: Computational Principles, Experiences, and Perspectives}, booktitle = {International Joint Conference on Autonomous Agents and Multi Agent Systems ({AAMAS)} 2002}, year = {2002}, pages = {805–812}, address = {Bologna, Italy}, abstract = {This paper describes the computational model underlying the {AGILO} autonomous robot soccer team, its implementation, and our experiences with it. The most salient aspects of the {AGILO} control software are that it includes (1) a cooperative probabilistic game state estimator working with a simple off-the-shelf camera system; (2) a situated action selection module that makes amble use of experience-based learning and produces coherent team behavior even if inter-robot communication is perturbed; and (3) a playbook executor that can perform preprogrammed complex soccer plays in appropriate situations by employing plan-based control techniques. The use of such sophisticated state estimation and control techniques distinguishes the {AGILO} software from many others applied to mid-size autonomous robot soccer. The paper discusses the computational techniques and necessary extensions based on experimental data from the 2001 robot soccer world championship.}, } @article{beetz_learning_2010, author = {M Beetz and M Buss and B Radig}, title = {Learning from Humans – Cognition-enabled Computational Models of Everyday Activity}, journal = {Künstliche Intelligenz}, year = {2010}, } @article{beetz_agilo_2004, author = {M Beetz and T Schmitt and R Hanek and S Buck and F Stulp and D Schröter and B Radig}, title = {The {AGILO} Robot Soccer Team – Experience-based Learning and Probabilistic Reasoning in Autonomous Robot Control}, journal = {Autonomous Robots}, year = {2004}, volume = {17}, pages = {55–77}, number = {1}, abstract = {This article describes the computational model underlying the {AGILO} autonomous robot soccer team, its implementation, and our experiences with it. According to our model the control system of an autonomous soccer robot consists of a probabilistic game state estimator and a situated action selection module. The game state estimator computes the robot's belief state with respect to the current game situation using a simple off-theshelf camera system. The estimated game state comprises the positions and dynamic states of the robot itself and its teammates as well as the positions of the ball and the opponent players. Employing sophisticated probabilistic reasoning techniques and exploiting the cooperation between team mates, the robot can estimate complex game states reliably and accurately despite incomplete and inaccurate state information. The action selection module selects actions according to specified selection criteria as well as learned experiences. Automatic learning techniques made it possible to develop fast and skillful routines for approaching the ball, assigning roles, and performing coordinated plays. The paper discusses the computational techniques based on experimental data from the 2001 robot soccer world championship.}, } @inproceedings{beetz_assistive_2008, author = {M Beetz and F Stulp and B Radig and Jand BN Bandouch and M Dolha and A Fedrizzi and D Jain and U Klank and I Kresse and A Maldonado and Z Marton and L Mösenlechner and F Ruiz and RB Rusu and M Tenorth}, title = {The Assistive Kitchen – A Demonstration Scenario for Cognitive Technical Systems}, booktitle = {{IEEE} 17th International Symposium on Robot and Human Interactive Communication ({RO-MAN)}, Muenchen, Germany}, year = {2008}, pages = {1--8}, } @inproceedings{bertelsmeier_kontextunterstutzte_1977, author = {R. Bertelsmeier and B Radig}, title = {Kontextunterstützte Analyse von Szenen mit bewegten Objekten.}, booktitle = {Digital Bildverarbeitung - Digital Image Processing, {GI/NTG} Fachtagung, München, 28.-30. März 1977}, year = {1977}, editor = {Nagel, Hans-Hellmut}, pages = {101--128}, publisher = {Springer}, isbn = {3-540-08169-0}, } @inproceedings{bigontina_pose_OGRW_2014, author = {A Bigontina and M Herrmann and M Hoernig and B Radig}, title = {Human Body Part Classification in Monocular Soccer Images}, booktitle = {9-th Open German-Russian Workshop on Pattern Recognition and Image Understanding}, year = {2014}, address = {Koblenz}, month = {12}, keywords = {Articulated Pose Estimation, Human Body Pose Estimation, Pixel-based Classification, Random Forests, soccer}, owner = {herrmmic}, timestamp = {2014.12.04}, } @techreport{brscic_multi_2010, author = {D. Brščić and M Eggers and F. Rohrmüller and O. and SS. Kourakos and D. Althoff and M. Lawitzky and A. and RM. Mörtl and V. Koropouli and J. R. M Hernández and X. Zang and W. Wang and D. Wollherr and K. Kühnlenz and C Mayer and T. Kruse and A. Kirsch and J. Blume and A. Bannat and T. Rehrl and F. Wallhoff and T. Lorenz and P. Basili and C. Lenz and T. Röder and G. Panin and W. Maier and S. and BM. Hirche and M. Beetz and B Radig and A. Schubö and S. Glasauer and A. Knoll and E. Steinbach}, title = {Multi Joint Action in {CoTeSys} — Setup and Challenges}, institution = {{CoTeSys} Cluster of Excelence: Technische Universität München \& Ludwig-Maximilians-Universität München}, year = {2010}, number = {{CoTeSys-TR-10-01}}, address = {Munich, Germany}, month = {jun}, } @article{eggers_setup_2013, author = {M Eggers and V Dikov and C Mayer and C Steger and B Radig}, title = {Setup and calibration of a distributed camera system for surveillance of laboratory space}, journal = {Pattern Recognition and Image Analysis}, year = {2013}, volume = {23}, pages = {481--487}, number = {4}, month = {oct}, doi = {10.1134/S1054661813040032}, issn = {1054-6618, 1555-6212}, language = {en}, url = {http://link.springer.com/10.1134/S1054661813040032}, urldate = {2014-05-15}, } @inproceedings{gast_did_2009, author = {J Gast and A Bannat and T Rehrl and Cand WF Mayer and G Rigoll and B Radig}, title = {Did I Get it Right: Head Gesture Analysis for Human-Machine Interaction}, booktitle = {Human-Computer Interaction. Novel Interaction Methods and Techniques}, year = {2009}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, } @inproceedings{gonsior_improving_2011, author = {B Gonsior and S Sosnowski and C Mayer and J Blume and B Radig and D Wollherr and K Kühnlenz}, title = {Improving Aspects of Empathy and Subjective Performance for {HRI} through Mirroring Facial Expressions}, booktitle = {Proceedings of the 19th {IEEE} International Symposium on Robot and Human Interactive Communication}, year = {2011}, keywords = {facial expressions}, } @article{herrmmic_tracking_2014, author = {M Herrmann and M Hoernig and B Radig}, title = {Online Multi-player Tracking in Monocular Soccer Videos }, journal = {AASRI Procedia }, year = {2014}, volume = {8}, pages = {30 - 37}, number = {0}, issn = {2212-6716}, keywords = {computer vision; soccer}, url = {http://www.sciencedirect.com/science/article/pii/S2212671614000730}, } @article{herrmann_automatic_2014, author = {M Herrmann and C Mayer and B Radig}, title = {Automatic Generation of Image Analysis Programs}, journal = {Pattern Recognition and Image Analysis}, year = {2014}, volume = {24}, pages = {400-408}, number = {3}, doi = {10.1134/S1054661814030079}, issn = {1054-6618}, keywords = {automatic programming; inductive programming; generate-and-search; machine learning; computer vision; image analysis; object detection}, language = {English}, publisher = {Pleiades Publishing}, url = {http://dx.doi.org/10.1134/S1054661814030079}, } @inproceedings{herrmann_automatic_2013, author = {M Herrmann and C Mayer and B Radig}, title = {Automatic Generation of Image Analysis Programs}, booktitle = {11th International Conference on Pattern Recognition and Image Analysis ({PRIA-11-2013)}}, year = {2013}, volume = {1}, pages = {36--39}, address = {Samara}, month = {sep}, publisher = {The Russian Academy of Sciences}, keywords = {automatic programming; inductive programming; generate-and-search; machine learning; computer vision; image analysis; object detection}, } @article{OJWT-v1i2n01_Hoernig, author = {M Hoernig and A Bigontina and B Radig}, title = {A Comparative Evaluation of Current HTML5 Web Video Implementations}, journal = {Open Journal of Web Technologies (OJWT)}, year = {2014}, volume = {1}, pages = {1--9}, number = {2}, bibsource = {RonPub UG (haftungsbeschr{\"a}nkt)}, issn = {2199 -188X}, publisher = {RonPub UG (haftungsbeschr{\"a}nkt)}, url = {http://www.ronpub.com/publications/OJWT-v1i2n01_Hoernig.pdf}, } @article{hoernig_real-time_2014, author = {M Hoernig and M Herrmann and B Radig}, title = {Real-Time Segmentation Methods for Monocular Soccer Videos}, journal = {Pattern Recogntion and Image Analysis, To appear}, year = {2015}, keywords = {soccer}, } @inproceedings{hoernig_shot_detection_2014, author = {M Hoernig and M Herrmann and B Radig}, title = {Multi Temporal Distance Images for Shot Detection in Soccer Games}, booktitle = {EUSIPCO 2014 (22nd European Signal Processing Conference 2014) (EUSIPCO 2014)}, year = {2014}, address = {Lisbon, Portugal}, month = {sep}, abstract = {We present a new approach for video shot detection and introduce multi temporal distance images (MTDIs), formed by chi-square based similarity measures that are calculated pairwise within a floating window of video frames. By using MTDI-based boundary detectors, various cuts and transitions in various shapes (dissolves, overlayed effects, fades, and others) can be determined. The algorithm has been developed within the special context of soccer game TV broadcasts, where a particular interest in long view shots is intrinsic. With a correct shot detection rate in camera 1 shots of 98.2\% within our representative test data set, our system outperforms competing state-of-the-art systems.}, days = {1}, keywords = {soccer video analysis; video indexing; multi temporal distance image (MTDI); video segmentation; video shot boundary detection; soccer}, } @inproceedings{hoernig_real_2013, author = {M Hoernig and M Herrmann and B Radig}, title = {Real Time Soccer Field Analysis from Monocular {TV} Video Data}, booktitle = {11th International Conference on Pattern Recognition and Image Analysis ({PRIA-11-2013)}}, year = {2013}, volume = {2}, pages = {567--570}, address = {Samara}, month = {sep}, publisher = {The Russian Academy of Sciences}, keywords = {soccer}, } @inproceedings{hammerle_sensor-based_2005, author = {S Hämmerle and M Wimmer and B Radig and M Beetz}, title = {Sensor-based Situated, Individualized, and Personalized Interaction in Smart Environments}, booktitle = {{INFORMATIK} 2005 - Informatik {LIVE!} Band 1, Beiträge der 35. Jahrestagung der Gesellschaft für Informatik ({GI)}}, year = {2005}, editor = {Cremers, Armin B. and Manthey, Rainer and Martini, Peter and Steinhage, Volker}, volume = {67}, series = {{LNI}}, pages = {261--265}, address = {Bonn, Germany}, month = {sep}, publisher = {{GI}}, abstract = {Smart environments are sensor equipped areas that know about their environment thus being able to adapt to the user. We present {sHOME}, a multiagent based platform for integrating situated, individualized, and personalized information. {sHOME} acquires sensor data to determine the user's identity, his location, his gesture, and natural language commands and stores it in a central knowledge base.}, isbn = {3-88579-396-2}, } @inproceedings{lenz_distributed_2010, author = {C. Lenz and T. Röder and M Eggers and S. Amin and T. Kisler and B Radig and G. Panin and A. Knoll}, title = {A Distributed Many-Camera System for Multi-Person Tracking}, booktitle = {Proceedings of the First International Joint Conference on Ambient Intelligence ({AmI} 2010)}, year = {2010}, editor = {Wichert, R. and Ruyter, B. de}, month = {nov}, publisher = {Springer Lecture Notes in Computer Science}, } @article{mayer_cross-database_2014, author = {C Mayer and M Eggers and B Radig}, title = {Cross-database evaluation for facial expression recognition}, journal = {Pattern Recognition and Image Analysis}, year = {2014}, volume = {24}, pages = {124--132}, number = {1}, month = {jan}, doi = {10.1134/S1054661814010106}, issn = {1054-6618, 1555-6212}, keywords = {facial expressions}, language = {en}, url = {http://link.springer.com/10.1134/S1054661814010106}, urldate = {2014-05-15}, } @article{mayer_face_2013, author = {C Mayer and B Radig}, title = {Face model fitting with learned displacement experts and multi-band images}, journal = {Pattern Recognition and Image Analysis}, year = {2013}, volume = {23}, pages = {287--295}, number = {2}, month = {apr}, doi = {10.1134/S1054661813020119}, issn = {1054-6618, 1555-6212}, keywords = {facial expressions}, language = {en}, url = {http://link.springer.com/10.1134/S1054661813020119}, urldate = {2014-05-15}, } @article{mayer_face_2011, author = {C Mayer and B Radig}, title = {Face model fitting with learned displacement experts and multi-band images}, journal = {Pattern Recognition and Image Analysis}, year = {2011}, volume = {21}, pages = {526--529}, number = {3}, month = {sep}, doi = {10.1134/S1054661811020738}, issn = {1054-6618, 1555-6212}, keywords = {facial expressions}, language = {en}, url = {http://link.springer.com/10.1134/S1054661811020738}, urldate = {2014-05-15}, } @inproceedings{mayer_learning_2011, author = {C Mayer and B Radig}, title = {Learning Displacement Experts from Multi-band Images for Face Model Fitting}, booktitle = {International Conference on Advances in Computer-Human Interaction}, year = {2011}, month = {feb}, keywords = {facial expressions}, } @inproceedings{mayer_towards_2010, author = {C Mayer and S Sosnowski and K Kühnlenz and B Radig}, title = {Towards robotic facial mimicry: system development and evaluation}, booktitle = {Proceedings of the 19th {IEEE} International Symposium on Robot and Human Interactive Communication}, year = {2010}, keywords = {facial expressions}, } @inproceedings{mayer_facial_2009, author = {C Mayer and M Wimmer and M Eggers and B Radig}, title = {Facial Expression Recognition with {3D} Deformable Models}, booktitle = {Proceedings of the 2nd International Conference on Advancements Computer-Human Interaction ({ACHI)}}, year = {2009}, publisher = {Springer}, keywords = {facial expressions}, } @article{mayer_adjusted_2009, author = {C Mayer and M Wimmer and B Radig}, title = {Adjusted Pixel Features for Facial Component Classification}, journal = {Image and Vision Computing Journal}, year = {2009}, keywords = {facial expressions}, } @inproceedings{mayer_interpreting_2008, author = {C Mayer and M Wimmer and F Stulp and Z Riaz and A Roth and M Eggers and B Radig}, title = {Interpreting the Dynamics of Facial Expressions in Real Time Using Model-based Techniques}, booktitle = {Proceedings of the 3rd Workshop on Emotion and Computing: Current Research and Future Impact}, year = {2008}, pages = {45--46}, address = {Kaiserslautern, Germany}, month = {sep}, keywords = {facial expressions}, } @inproceedings{mayer_real_2008, author = {C Mayer and M Wimmer and F Stulp and Z Riaz and A Roth and M Eggers and B Radig}, title = {A Real Time System for Model-based Interpretation of the Dynamics of Facial Expressions}, booktitle = {Proc. of the International Conference on Automatic Face and Gesture Recognition ({FGR08)}}, year = {2008}, address = {Amsterdam, Netherlands}, month = {sep}, keywords = {facial expressions}, } @inproceedings{pietzsch_face_2008, author = {S Pietzsch and M Wimmer and F Stulp and B Radig}, title = {Face Model Fitting with Generic, Group-specific, and Person-specific Objective Functions}, booktitle = {3rd International Conference on Computer Vision Theory and Applications ({VISAPP)}}, year = {2008}, volume = {2}, pages = {5--12}, address = {Madeira, Portugal}, month = {jan}, abstract = {In model-based fitting, the model parameters that best fit the image are determined by searching for the optimum of an objective function. Often, this function is designed manually, based on implicit and domain-dependent knowledge. We acquire more robust objective function by learning them from annotated images, in which many critical decisions are automated, and the remaining manual steps do not require domain knowledge. Still, the trade-off between generality and accuracy remains. General functions can be applied to a large range of objects, whereas specific functions describe a subset of objects more accurately. Gross et al. have demonstrated this principle by comparing generic to person-specific Active Appearance Models. As it is impossible to learn a person-specific objective function for the entire human population, we automatically partition the training images and then learn partition-specific functions. The number of groups influences the specificity of the learned functions. We automatically determine the optimal partitioning given the number of groups, by minimizing the expected fitting error. Our empirical evaluation demonstrates that the group-specific objective functions more accurately describe the images of the corresponding group. The results of this paper are especially relevant to face model tracking, as individual faces will not change throughout an image sequence.}, keywords = {facial expressions}, } @article{radig_perception_2011, author = {B Radig and C Mayer}, title = {Perception as a key component for cognitive technical systems}, journal = {Pattern Recognition and Image Analysis}, year = {2011}, volume = {21}, pages = {160--163}, number = {2}, month = {jun}, doi = {10.1134/S1054661811020921}, issn = {1054-6618, 1555-6212}, language = {en}, url = {http://link.springer.com/10.1134/S1054661811020921}, urldate = {2014-05-15}, } @inproceedings{riaz_image_2009, author = {Z Riaz and M Beetz and B Radig}, title = {Image Normalization for Face Recognition using {3D} Model}, booktitle = {International Conference of Information and Communication Technologies, Karachi, Pakistan}, year = {2009}, publisher = {{IEEE}}, keywords = {facial expressions}, } @inproceedings{riaz_shape_2008, author = {Z Riaz and M Beetz and B Radig}, title = {Shape Invariant Recognition of Segmented Human Faces using Eigenfaces}, booktitle = {Proceedings of the 12th International Multitopic Conference}, year = {2008}, publisher = {{IEEE}}, keywords = {facial expressions}, } @inproceedings{riaz_unified_2009, author = {Z Riaz and S Gedikli and M Beetz and B Radig}, title = {A Unified Features Approach to Human Face Image Analysis and Interpretation}, booktitle = {Affective Computing and Intelligent Interaction, Amsterdam, Netherlands}, year = {2009}, publisher = {{IEEE}}, keywords = {facial expressions}, } @inproceedings{riaz_3d_2009, author = {Z Riaz and C Mayer and M Beetz and B Radig}, title = {{3D} Model for Face Recognition across Facial Expressions}, booktitle = {Biometric {ID} Management and Multimodal Communication, Madrid, Spain}, year = {2009}, publisher = {Springer}, keywords = {facial expressions}, } @inproceedings{riaz_facial_2009, author = {Z Riaz and C Mayer and M Beetz and B Radig}, title = {Facial Expressions Recognition from Image Sequences}, booktitle = {2nd International Conference on Cross-Modal Analysis of Speech, Gestures, Gaze and Facial Expressions, Prague, Czech Republic}, year = {2009}, publisher = {Springer}, keywords = {facial expressions}, } @inproceedings{riaz_model_2009-1, author = {Z Riaz and C Mayer and M Beetz and B Radig}, title = {Model Based Analysis of Face Images for Facial Feature Extraction}, booktitle = {Computer Analysis of Images and Patterns, Munster, Germany}, year = {2009}, publisher = {Springer}, keywords = {facial expressions}, } @inproceedings{riaz_model_2009, author = {Z Riaz and C Mayer and M Wimmer and M Beetz and B Radig}, title = {A Model Based approach for Expression Invariant Face Recognition}, booktitle = {3rd International Conference on Biometrics, Alghero Italy}, year = {2009}, publisher = {Springer}, keywords = {facial expressions}, } @inproceedings{riaz_model_2008, author = {Z Riaz and C Mayer and M Wimmer and B Radig}, title = {Model Based Face Recognition Across Facial Expressions}, booktitle = {Journal of Information and Communication Technology}, year = {2008}, month = {dec}, keywords = {facial expressions}, } @article{schmitt_cooperative_2002, author = {T Schmitt and R Hanek and M Beetz and S Buck and B Radig}, title = {Cooperative Probabilistic State Estimation for Vision-based Autonomous Mobile Robots}, journal = {{IEEE} Transactions on Robotics and Automation}, year = {2002}, volume = {18}, number = {5}, month = {oct}, abstract = {With the services that autonomous robots are to provide becoming more demanding, the states that the robots have to estimate become more complex. In this article, we develop and analyze a probabilistic, vision-based state estimation method for individual, autonomous robots. This method enables a team of mobile robots to estimate their joint positions in a known environment and track the positions of autonomously moving objects. The tate estimators of different robots cooperate to increase the accuracy and reliability of the estimation process. This cooperation between the robots enables them to track temporarily occluded objects and to faster recover their position after they have lost track of it. The method is empirically validated based on experiments with a team of physical robots.}, } @inproceedings{schroter_detection_2004, author = {D Schröter and T. Weber and M Beetz and B Radig}, title = {Detection and Classification of Gateways for the Acquisition of Structured Robot Maps}, booktitle = {Proc. of 26th Pattern Recognition Symposium ({DAGM)}, {Tübingen/Germany}}, year = {2004}, abstract = {The automatic acquisition of structured object maps requires sophisticated perceptual mechanisms that enable the robot to recognize the objects that are to be stored in the robot map. This paper investigates a particular object recognition problem: the automatic detection and classification of gateways in office environments based on laser range data. We will propose, discuss, and empirically evaluate a sensor model for crossing gateways and different approaches to gateway classification including simple maximum classifiers and {HMM-based} classification of observation sequences.}, } @inproceedings{schuller_audiovisual_2007, author = {B Schuller and M Wimmer and D Arsic and G Rigoll and B Radig}, title = {Audiovisual Behavior Modeling by Combined Feature Spaces}, booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP)}}, year = {2007}, volume = {2}, pages = {733--736}, address = {Honolulu, Hawaii, {USA}}, month = {apr}, isbn = {1-4244-0728-1}, } @inproceedings{sosnowski_mirror_2010, author = {S Sosnowski and C Mayer and K Kühnlenz and B Radig}, title = {Mirror my emotions! Combining facial expression analysis and synthesis on a robot}, booktitle = {The Thirty Sixth Annual Convention of the Society for the Study of Artificial Intelligence and Simulation of Behaviour ({AISB2010)}}, year = {2010}, keywords = {facial expressions}, } @inproceedings{wallhoff_real-time_2010, author = {F Wallhoff and T Rehrl and C Mayer and B Radig}, title = {Real-Time Face and Gesture Analysis for Human-Robot Interaction}, booktitle = {Real-Time Image and Video Processing 2010}, year = {2010}, series = {Proceedings of {SPIE}}, keywords = {facial expressions}, } @inproceedings{wimmer_asm_2008, author = {M Wimmer and S Fujie and F Stulp and T Kobayashi and B Radig}, title = {An {ASM} Fitting Method Based on Machine Learning that Provides a Robust Parameter Initialization for {AAM} Fitting}, booktitle = {Proc. of the International Conference on Automatic Face and Gesture Recognition ({FGR08)}}, year = {2008}, address = {Amsterdam, Netherlands}, month = {sep}, abstract = {Due to their use of information contained in texture, Active Appearance Models ({AAM)} generally outperform Active Shape Models ({ASM)} in terms of fitting accuracy. Although many extensions and improvements over the original {AAM} have been proposed, on of the main drawbacks of {AAMs} remains its dependence on good initial model parameters to achieve accurate fitting results. In this paper, we determine the initial model parameters for {AAM} fitting with {ASM} fitting, and use machine learning techniques to improve the scope and accuracy of {ASM} fitting. Combining the precision of {AAM} fitting with the large radius of convergence of learned {ASM} fitting improves the results by an order of magnitude, as our empirical evaluation on a database of publicly available benchmark images demonstrates.}, } @inproceedings{wimmer_are_2008, author = {M Wimmer and C Mayer and M Eggers and B Radig}, title = {Are You Happy with Your First Name?}, booktitle = {Proceedings of the 3rd Workshop on Emotion and Computing: Current Research and Future Impact}, year = {2008}, pages = {23--29}, address = {Kaiserslautern, Germany}, month = {sep}, } @inproceedings{wimmer_tailoring_2008, author = {M Wimmer and C Mayer and S Pietzsch and B Radig}, title = {Tailoring Model-based Techniques for Facial Expression Interpretation}, booktitle = {The First International Conference on Advances in Computer-Human Interaction ({ACHI08)}}, year = {2008}, address = {Sainte Luce, Martinique}, month = {feb}, keywords = {facial expressions}, } @inproceedings{wimmer_recognizing_2008, author = {M Wimmer and C Mayer and B Radig}, title = {Recognizing Facial Expressions Using Model-based Image Interpretation}, booktitle = {Verbal and Nonverbal Communication Behaviours, {COST} Action 2102 International Workshop}, year = {2008}, address = {Vietri sul Mare, Italy}, month = {apr}, abstract = {Even if electronic devices widely occupy our daily lives, human-machine interaction still lacks intuition. Therefore, researchers intend to resolve these shortcomings by augmenting traditional systems with aspects of human-human interaction and consider human emotion, behavior, and intention. This publication focusses on one aspect of this challenge: recognizing facial expressions. Our approach achieves real-time performance and provides robustness for real-world applicability. This computer vision task comprises of various phases for which it exploits model-based techniques that accurately localize facial features, seamlessly track them through image sequences, and finally infer facial expressions visible. We specifically adapt state-of-the-art techniques to each of these challenging phases. Our system has been successfully presented to industrial, political, and scientific audience in various events.}, keywords = {facial expressions}, } @inproceedings{wimmer_robustly_2008, author = {M Wimmer and C Mayer and B Radig}, title = {Robustly Classifying Facial Components Using a Set of Adjusted Pixel Features}, booktitle = {Proc. of the International Conference on Face and Gesture Recognition ({FGR08)}}, year = {2008}, address = {Amsterdam, Netherlands}, month = {sep}, abstract = {Efficient and accurate localization of the components of human faces, such as skin, lips, eyes, and brows, provides benefit to various real-world applications. However, high intra-class and small inter-class variations in color prevent simple but quick pixel classifiers from yielding robust results. In contrast, more elaborate classifiers consider shape or region features but they do not achieve real-time performance. In this paper, we show that it definitely is possible to robustly determine the facial components and achieve far more than real-time performance. We also use quick pixel-level classifiers and provide them with a set of pixel features that are adapted to the image characteristics beforehand. We do not manually select the pixel features and specify the calculation rules. In contrast, our idea is to provide a multitude of features and let the Machine Learning algorithm decide which of them are important. The evaluation draws a comparison to fixed approaches that do not adapt the computation of the features to the image content in any way. The obtained accuracy is precise enough to be used for real-world applications such as for model-based interpretation of human faces.}, keywords = {facial expressions}, } @inproceedings{wimmer_face_2008, author = {M Wimmer and C Mayer and F Stulp and B Radig}, title = {Face Model Fitting based on Machine Learning from Multi-band Images of Facial Components}, booktitle = {Workshop on Non-Rigid Shape Analysis and Deformable Image Alignment, held in conjunction with {CVPR}}, year = {2008}, address = {Anchorage, {AK}, {USA}}, month = {jun}, abstract = {Geometric models allow to determine semantic information about real-world objects. Model fitting algorithms need to find the best match between a parameterized model and a gi ven image. This task inherently requires an objective function to estimate the error between a model parameterization and an image. The accuracy of this function directly inf luences the accuracy of the entire process of model fitting. Unfortunately, building these functions is a non-trivial task. Dedicated to the application of face model fitting, this paper proposes to consider a multi-band image representation that indicates the facial components, from which a large set of image features is computed. Since it is not possible to manually formulate an objective function that considers this large amount of features, we apply a Machine Lear ning framework to construct them. This automatic approach is capable of considering the large amount of features provided and yield highly accurate objective functions for fa ce model fitting. Since the Machine Learning framework rejects non-relevant image features, we obtain high performance runtime characteristics as well.}, keywords = {facial expressions}, } @inproceedings{wimmer_estimating_2007, author = {M Wimmer and C Mayer and F Stulp and B Radig}, title = {Estimating Natural Activity by Fitting {3D} Models via Learned Objective Functions}, booktitle = {Workshop on Vision, Modeling, and Visualization ({VMV)}}, year = {2007}, volume = {1}, pages = {233--241}, address = {Saarbrücken, Germany}, month = {nov}, abstract = {Model-based image interpretation has proven to robustly extract high-level scene descriptors from raw image data. Furthermore, geometric texture models represent a fundamental component for visualizing real-world scenarios. However, the motion of the model and the real-world object must be similar in order to portray natural activity. Again, this information can be determined by inspecting images via model-based image interpretation. This paper sketches the challenge of fitting models to images, describes the shortcomings of current approaches and proposes a technique based on machine learning techniques. We identify the objective function as a crucial component for fitting models to images. Furthermore, we state preferable properties of these functions and we propose to learn such a function from manually annotated example images.}, } @inproceedings{wimmer_robustly_2008-1, author = {M Wimmer and S Pietzsch and C Mayer and B Radig}, title = {Robustly Estimating the Color of Facial Components Using a Set of Adjusted Pixel Features}, booktitle = {14. Workshop Farbbildverarbeitung}, year = {2008}, pages = {85--96}, address = {Aachen, Germany}, month = {oct}, keywords = {facial expressions}, } @inproceedings{wimmer_learning_2007, author = {M Wimmer and S Pietzsch and F Stulp and B Radig}, title = {Learning Robust Objective Functions with Application to Face Model Fitting}, booktitle = {Proceedings of the 29th {DAGM} Symposium}, year = {2007}, volume = {1}, pages = {486--496}, address = {Heidelberg, Germany}, month = {sep}, abstract = {Model-based image interpretation extracts high-level information from images using a priori knowledge about the object of interest. The computational challenge is to determine the model parameters that best match a given image by searching for the global optimum of the involved objective function. Unfortunately, this function is usually designed manually, based on implicit and domain-dependent knowledge, which prevents the fitting task from yielding accurate results. In this paper, we demonstrate how to improve model fitting by learning objective functions from annotated training images. Our approach automates many critical decisions and the remaining manual steps hardly require domain-dependent knowledge. This yields more robust objective functions that are able to achieve the accurate model fit. Our evaluation uses a publicly available image database and compares the obtained results to a recent state-of-the-art approach.}, keywords = {facial expressions}, } @inproceedings{wimmer_adaptive_2005, author = {M Wimmer and B Radig}, title = {Adaptive Skin Color Classificator}, booktitle = {Proceedings of the first International Conference on Graphics, Vision and Image Processing}, year = {2005}, editor = {al, Ashraf Aboshosha et}, volume = {I}, pages = {324--327}, address = {Cairo, Egypt}, month = {dec}, publisher = {{ICGST}}, abstract = {A lot of computer vision applications benefit from robust skin color classification. But this is a hard challenge due to the various image conditions like camera settings, illumination, light source, shadows and many more. Furthermore people?s tans and ethnic groups also extend those conditions. In this work we present a parametric skin color classifier that can be adapted to the conditions of each image or image sequence. This is done by evaluating some previously know skin color pixels which are acquired by applying a face detector. This approach can distinguish skin color from very similar color like lip color or eye brow color. Its high speed and high accuracy makes it appropriate for real time applications such as face tracking and mimic recognition.}, isbn = {21970/2005}, } @inproceedings{wimmer_initial_2007, author = {M Wimmer and B Radig}, title = {Initial Pose Estimation for {3D} Models Using Learned Objective Functions}, booktitle = {Proceedings of the 8th Asian Conference on Computer Vision ({ACCV07)}}, year = {2007}, editor = {Yagi, Yasushi and Kang, Sing Bing and Kweon, In So and Zha, Hongbin}, volume = {4844}, series = {{LNCS}}, pages = {332--341}, address = {Heidelberg}, month = {nov}, publisher = {Springer}, abstract = {Tracking {3D} models in image sequences essentially requires determining their initial position and orientation. Our previous work identifies the objective function as a crucial component for fitting {2D} models to images. We state preferable properties of these functions and we propose to learn such a function from annotated example images. This paper extends this approach by making it appropriate to also fit {3D} models to images. The correctly fitted model represents the initial pose for model tracking. However, this extension induces nontrivial challenges such as out-of-plane rotations and self occlusion, which cause large variation to the models surface visible in the image. We solve this issue by connecting the input features of the objective function directly to the model. Furthermore, sequentially executing objective functions specifically learned for different displacements from the correct positions yields highly accurate objective values.}, isbn = {978-3-540-76389-5}, } @inproceedings{wimmer_automatically_2007, author = {M Wimmer and B Radig}, title = {Automatically Learning the Objective Function for Model Fitting}, booktitle = {Proceedings of the Meeting in Image Recognition and Understanding ({MIRU)}}, year = {2007}, address = {Hiroshima, Japan}, month = {jul}, abstract = {Model-based image interpretation has proven to appropriately extract high-level information from images. A priori knowledge about the object of interest represents the basis of this task. Model fitting determines the model that best matches a given image by searching for the global optimum of an objective function. Unfortunately, the objective function is usually designed manually, based on implicit and domain-dependent knowledge. In contrast, this paper describes how to obtain highly accurate objective functions by learning them from annotated training images. It automates many critical decisions and the remaining manual steps hardly require domain-dependent knowledge at all. This approach yields highly accurate objective functions. Our evaluation fits a face model to a publicly available image database and compares the obtained results to a recent state-of-the-art approach.}, } @article{wimmer_adaptive_2006, author = {M Wimmer and B Radig}, title = {Adaptive Skin Color Classificator}, journal = {{ICGST} International Journal on Graphics, Vision and Image Processing}, year = {2006}, volume = {Special Issue on Biometrics}, abstract = {Skin color is an important feature of faces. Various applications benefit from robust skin color detection. Skin color may look quite different, depending on camera settings, illumination, shadows, people's tans, ethnic groups. That variation is a challenging aspect of skin color classification. In this paper, we present an approach that uses a high level vision module to detect an image specific skin color model. This model is representative for the context conditions within the image and is used to adapt dynamic skin color classifiers to it. This approach distinguishes skin color from very similar color like lip color or eyebrow color. Its high speed and accuracy makes it appropriate for real time applications such as face model fitting, gaze estimation, and recognition of facial expressions.}, } @inproceedings{wimmer_person_2006, author = {M Wimmer and B Radig and M Beetz}, title = {A Person and Context Specific Approach for Skin Color Classification}, booktitle = {Procedings of the 18th International Conference of Pattern Recognition ({ICPR} 2006)}, year = {2006}, volume = {2}, pages = {39--42}, address = {Los Alamitos, {CA}, {USA}}, month = {aug}, publisher = {{IEEE} Computer Society}, abstract = {Skin color is an important feature of faces. Various applications benefit from robust skin color detection. Depending on camera settings, illumination, shadows, people?s tans, and ethnic groups skin color looks differently, which is a challenging aspect for detecting it automatically. In this paper, we present an approach that uses a high level vision module to detect an image specific skin color model. This model is then used to adapt parametric skin color classifiers to the processed image. This approach is capable to distinguish skin color from extremely similar colors, such as lip color or eyebrow color. Its high speed and high accuracy make it appropriate for real time applications such as face tracking and recognition of facial expressions.}, } @inproceedings{wimmer_sipbild_2007, author = {M Wimmer and B Radig and C Mayer}, title = {{SIPBILD} – Mimik- und Gestikerkennung in der Mensch-Maschine-Schnittstelle}, booktitle = {Beiträge der 37. Jahrestagung der Gesellschaft für Informatik ({GI)}}, year = {2007}, volume = {1}, pages = {271--274}, address = {Bremen, Germany}, month = {sep}, abstract = {Für eine natürliche Mensch-Maschine Interaktion spielt die Interpretation visueller Informationen eine zentrale Rolle. Fehlende Kontrolle der Umgebungsbedingungen wie Helligkeit und Hintergrundfarbe stellt hohe Anforderungen an die Bilderkennungssoftware. {SIPBILD} schafft es, mit modellbasierter Bildinterpretation die menschliche Mimik und Gestik zu erkennen. Um diese Technik in natürlichen Umgebungen einzusetzen, ist es allerdings notwendig, die bisherigen Techniken entscheidend zu verbessern. Insbesondere stellen wir eine Vorgehensweise vor, die robustes Model-Fitting ohne spezielles Fachwissen in der Bildverarbeitung erreicht und der Einsatz dieser Technik somit keinen Experten mehr verlangt.}, } @article{wimmer_recognizing_2008-1, author = {M Wimmer and Z Riaz and C Mayer and B Radig}, title = {Recognizing Facial Expressions Using Model-based Image Interpretation}, journal = {Advances in Human-Computer Interaction}, year = {2008}, volume = {1}, pages = {587--600}, month = {oct}, editor = {Pinder, Shane}, keywords = {facial expressions}, } @inproceedings{wimmer_low-level_2008, author = {M Wimmer and B Schuller and D Arsic and B Radig and G Rigoll}, title = {Low-level Fusion of Audio and Video Feature for Multi-modal Emotion Recognition}, booktitle = {3rd International Conference on Computer Vision Theory and Applications ({VISAPP)}}, year = {2008}, volume = {2}, pages = {145--151}, address = {Madeira, Portugal}, month = {jan}, abstract = {Bimodal emotion recognition through audiovisual feature fusion has been shown superior over each individual modality in the past. Still, synchronization of the two streams is a challenge, as many vision approaches work on a frame basis opposing audio turn- or chunk-basis. Therefore, late fusion schemes such as simple logic or voting strategies are commonly used for the overall estimation of underlying affect. However, early fusion is known to be more effective in many other multimodal recognition tasks. We therefore suggest a combined analysis by descriptive statistics of audio and video Low-Level-Descriptors for subsequent static {SVM} Classification. This strategy also allows for a combined feature-space optimization which will be discussed herein. The high effectiveness of this approach is shown on a database of 11.5h containing six emotional situations in an airplane scenario.}, } @article{wimmer_learning_2008, author = {M Wimmer and F Stulp and S Pietzsch and B Radig}, title = {Learning Local Objective Functions for Robust Face Model Fitting}, journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence ({PAMI)}}, year = {2008}, volume = {30}, pages = {1357--1370}, number = {8}, doi = {http://doi.ieeecomputersociety.org/10.1109/TPAMI.2007.70793}, issn = {0162-8828}, keywords = {facial expressions}, } @inproceedings{wimmer_enabling_2007, author = {M Wimmer and F Stulp and B Radig}, title = {Enabling Users to Guide the Design of Robust Model Fitting Algorithms}, booktitle = {Workshop on Interactive Computer Vision, held in conjunction with {ICCV} 2007}, year = {2007}, pages = {28}, address = {Rio de Janeiro, Brazil}, month = {oct}, publisher = {Omnipress}, abstract = {Model-based image interpretation extracts high-level information from images using a priori knowledge about the object of interest. The computational challenge in model fitting is to determine the model parameters that best match a given image, which corresponds to finding the global optimum of the objective function. When it comes to the robustness and accuracy of fitting models to specific images, humans still outperform state-of-the-art model fitting systems. Therefore, we propose a method in which non-experts can guide the process of designing model fitting algorithms. In particular, this paper demonstrates how to obtain robust objective functions for face model fitting applications, by learning their calculation rules from example images annotated by humans. We evaluate the obtained function using a publicly available image database and compare it to a recent state-of-the-art approach in terms of accuracy.}, isbn = {978-1-4244-1631-8}, } @inproceedings{wimmer_learning_2006, author = {M Wimmer and F Stulp and S Tschechne and B Radig}, title = {Learning Robust Objective Functions for Model Fitting in Image Understanding Applications}, booktitle = {Proceedings of the 17th British Machine Vision Conference ({BMVC)}}, year = {2006}, editor = {Chantler, Michael J. and Trucco, Emanuel and Fisher, Robert B.}, volume = {3}, pages = {1159--1168}, address = {Edinburgh, {UK}}, month = {sep}, publisher = {{BMVA}}, abstract = {Model-based methods in computer vision have proven to be a good approach for compressing the large amount of information in images. Fitting algorithms search for those parameters of the model that optimise the objective function given a certain image. Although fitting algorithms have been the subject of intensive research and evaluation, the objective function is usually designed ad hoc and heuristically with much implicit domain-dependent knowledge. This paper formulates a set of requirements that robust objective functions should satisfy. Furthermore, we propose a novel approach that learns the objective function from training images that have been annotated with the preferred model parameters. The requirements are automatically enforced during the learning phase, which yields generally applicable objective functions. We compare the performance of our approach to other approaches. For this purpose, we propose a set of indicators that evaluate how well an objective function meets the stated requirements.}, } @inproceedings{wimmer_human_2007, author = {M Wimmer and U Zucker and B Radig}, title = {Human Capabilities on Video-based Facial Expression Recognition}, booktitle = {Proceedings of the 2nd Workshop on Emotion and Computing – Current Research and Future Impact}, year = {2007}, editor = {Reichardt, Dirk and Levi, Paul}, pages = {7--10}, address = {Osnabrück, Germany}, month = {sep}, abstract = {A lot of promising computer vision research has been conducted in order to automatically recognize facial expressions during the last decade. Some of them achieve high accuracy, however, it has not yet been investigated how accurately humans accomplish this task, which will introduce a comparable measure. Therefore, we conducted a survey on this issue and this paper evaluates the gathered information regarding the recognition rate and the confusion of facial expressions.}, keywords = {facial expressions}, }