This file was created with JabRef 1.4. Encoding: GBK @STRING{ijcai = {Proc. of the 7th International Joint Conference on Artificial Intelligence}} @STRING{sig = {ACM Computer Graphics (SIGGRAPH)}} @STRING{acmtog = {ACM Transactions on Graphics}} @STRING{bmvc = {Proc. BMVC}} @STRING{cviu = {Computer Vision and Image Understanding (CVIU)}} @STRING{cgip = {Computer Graphics, Vision, and Image Processing}} @STRING{cvgip1 = {CVGIP: Image Understanding}} @STRING{cvgip2 = {CVGIP: Graphical Models and Image Processing}} @STRING{cvpr = {Proc.\ IEEE Conf.\ on Computer Vision and Pattern Recognition (CVPR)}} @STRING{eccv = {Proc.\ European Conf.\ on Computer Vision (ECCV)}} @STRING{accv = {Proc.\ Asian Conf.\ on Computer Vision (ACCV)}} @STRING{cvprTech = {Proc.\ IEEE Conf.\ on Computer Vision and Pattern Recognition (CVPR), Technical Sketches}} @STRING{iccv = {Proc.\ IEEE International Conf.\ on Computer Vision (ICCV)}} @STRING{ivc = {Image and Vision Computing}} @STRING{ijcv = {International Journal of Computer Vision (IJCV)}} @STRING{icip = {IEEE International Conference on Image Processing (ICIP)}} @STRING{fgr = {IEEE International Conference on Face and Gesture Recognition (FGR)}} @STRING{icpr = {IEEE International Conference on Pattern Recognition (ICPR)}} @STRING{icme = {IEEE International Conference on Multimedia and Expo (ICME)}} @STRING{josa = {JOSA}} @STRING{josaa = {JOSA-A}} @STRING{mva = {Machine Vision and Applications}} @STRING{pire = {Proc. of IRE}} @STRING{pieee = {Proc. of the IEEE}} @STRING{tassp = {IEEE Trans.\ Acoustics, Speech, and Signal Processing}} @STRING{tinfo = {IEEE Trans.\ Information Theory}} @STRING{tmedi = {IEEE Trans.\ Medical Imaging}} @STRING{tip = {IEEE Trans.\ Image Processing}} @STRING{tpami = {IEEE Trans.\ on Pattern Analysis and Machine Intelligence (PAMI)}} @STRING{pami = tpami} @STRING{tsp = {IEEE Trans.\ on Signal Processing}} @STRING{prl = {Pattern Recognition Letters}} @STRING{pr = {Pattern Recognition}} @STRING{tsmc = {IEEE Trans. Systems, Man, and Cybernetics}} @STRING{tcomp = {IEEE Trans. Computers}} @STRING{tcomm = {IEEE Trans. Communications}} @STRING{tcirc = {IEEE Trans. Circuits and Systems}} @STRING{tams = {Trans. American Mathematical Society}} @STRING{amm = {American Mathematics Monthly}} @STRING{bstj = {Bell Syst. Tech. J.}} @STRING{sfp = {Submitted for publication}} @STRING{sp = {Signal Processing}} @STRING{trobo = {IEEE Trans. Robotics and Automation}} @STRING{jan = {January}} @STRING{feb = {February}} @STRING{mar = {March}} @STRING{apr = {April}} @STRING{may = {May}} @STRING{jun = {June}} @STRING{jul = {July}} @STRING{aug = {August}} @STRING{sep = {September}} @STRING{oct = {October}} @STRING{nov = {November}} @STRING{dec = {December}} @STRING{eurograph = {}} @ARTICLE{Ahlberg2002, author = {J.\ Ahlberg}, title = {An active model for facial feature tracking}, journal = {EURASIP Journal on Applied Signal Processing}, year = {2002}, number = {6}, pages = {566-571}, owner = {lir}, } @ARTICLE{Alvarez2000, author = {L.\ Alvarez and J.\ Weickert and J. S\'anchez}, title = {Reliable Estimation of Dense Optical Flow Fields with Large Displacements}, journal = ijcv, year = {2000}, volume = {39}, number = {1}, pages = {41-56}, } @ARTICLE{Alvarez2001, author = {L.\ Alvarez and R.\ Deriche and J.\ Weickert and J. S\'anchez}, title = {Dense Disparity Map Estimation Respecting Image Discontinuities: A {PDE} and Scale-Space Based Approach}, journal = {International Journal of Visual Communication and Image Representation}, year = {2001}, volume = {13}, pages = {3-21}, abstract = {We present an energy based approach to estimate a dense disparity map between two images while preserving its discontinuities resulting from image boundaries. We first derive a simplified expression for the disparity that allows us to easily estimate it from a stereo pair of images using an energy minimization approach. We assume that the epipolar geometry is known, and we include this information in the energy model. Discontinuities are preserved by means of a regularization term based on the Nagel--Enkelmann operator. We investigate the associated Euler--Lagrange equation of the energy functional, and we approach the solution of the underlying partial differential equation (PDE) using a gradient descent method. In order to reduce the risk to be trapped within some irrelevant local minima during the iterations, we use a focusing strategy based on a linear scale-space. We prove the existence and uniqueness of the underlying parabolic partial differential equation. Experimental results on both synthetic and real images are presented to illustrate the capabilities of this PDE and scale-spac- e based method. }, } @ARTICLE{Aubert1999, author = {G.\ Aubert and R.\ Deriche and P.\ Kornprobst}, title = {Computing optical flow via variational techniques}, journal = {SIAM J. on Applied Math.}, year = {1999}, volume = {60}, number = {1}, pages = {156 - 182}, } @INPROCEEDINGS{Avidan1998, author = {S.\ Avidan and A.\ Shashua}, title = {Non-rigid parallax for 3{D} linear motion}, booktitle = cvpr, year = {1998}, pages = {62 - 66}, } @INPROCEEDINGS{Birchfield1998, author = {S.\ Birchfield }, title = {Elliptical Head Tracking Using Intensity Gradients and Color Histograms }, booktitle = cvpr, year = {1998}, abstract = {An elliptical head tracker has been developed that works as follows. When a new image becomes available, the tracker performs a local search to find the best position and size for the ellipse by maximizing the sum of two terms: one involving the dot product of the image gradient with the ellipse normal, and another involving the normalized histogram intersection between the color histogram of the ellipse's interior and a previously stored, personalized color histogram model. Despite the local search, velocity prediction removes any restriction on maximum lateral image velocity. In real time, the tracker is able to reliably and automatically control the camera's pan, tilt, and zoom in order to keep the subject centered in the field of view at a desired size. Compared with previous work, this head tracker is the only system of which I am aware that uses multiple tracking criteria and can handle full 360-degree out-of-plane rotation, large scale changes, arbitrary camera movement, and multiple moving people in the background }, owner = {lir}, } @INPROCEEDINGS{Black1991, author = {M.\ Black and P.\ Anandan}, title = {Robust dynamic motion estimation overtime}, booktitle = cvpr, year = {1991}, pages = {292 - 302}, } @INPROCEEDINGS{Black1995, author = {M.\ Black and Y.\ Yacoob}, title = {Tracking and recognizing rigid and non-rigid facial motions using local parametric models to image motion}, booktitle = iccv, year = {1995}, pages = {374-381}, } @ARTICLE{Black1996, author = {M.\ Black and P.\ Anandan}, title = {The robust estimation of multiple motions: Parametric and piecewise smooth flow fields}, journal = cviu, year = {1996}, volume = {63}, number = {1}, pages = {75 - 104}, } @ARTICLE{Black1996a, author = {M.\ Black and A.\ Jepson}, title = {Estimating optical flow in segmented images using variable-order parametric models with local deformations}, journal = pami, year = {1996}, volume = {18}, number = {10}, pages = {972 - 986}, } @ARTICLE{Blanc-Feraud1993, author = {L.\ Blanc-Feraud and M.\ Barlaud and T.\ Gaidon}, title = {Motion estimation involving discontinuities in a multiresolution scheme}, journal = {Optical Engineering}, year = {1993}, volume = {32}, number = {7}, pages = {1475 - 1482}, } @INPROCEEDINGS{Blanz1999, author = {V.\ Blanz and T.\ Vetter}, title = {A morphable model for the synthesis of 3d faces}, booktitle = sig, year = {1999}, pages = {187-194}, abstract = {In this paper, a new technique for modeling textured 3D faces is introduced, 3D faces can either be generated automatically from one or more photographs, or modeled directly through an intuitive user interface. Users are assisted in two key problems of computer aided face modeling. First, new face images or new 3D face models can be registered autmotically by computing dense one-to-one coresspondence to an internal face model. Second, the approach regulates the naturalness of modeled faces avoiding faces with an ''unlikely'' appearance. Starting from an example set of 3D face models, we derive a morphable face model by transforming the shape and texture of the examples into a vector space representation. New faces and expressions can be modeled by forming linear combinations of the prototypes. Shape and texture constraints derived from the statistics of our example faces are used to guide manual modeling or automated matching algorithms. We show 3D face reconstructions from single images and their applications for photo-realistic image manipulations. We also demonstrate face manipulations according to complex parameters such as gender, fullness of a face or its distinctiveness.}, } @INPROCEEDINGS{Blanz2003, author = {V.\ Blanz and C.\ Basso and T.\ Poggio and T.\ Vetter}, title = {Reanimating Faces in Images and Video}, booktitle = eurograph, year = {2003}, abstract = {This paper presents a method of photo-realistic animation that can be applied to any face shown in a single image or a video. The technique does not require example data of the person's mouth movements, and the image to be animated is not restricted in pose or illumination. Video reanimation allows for head rotations and speech in the original sequence, but neither of these motions is required. In order to animate novel faces, the system transfers mouth movements and expressions across individuals, based on a common representation of different faces and facial expressions in a vector space of 3D shapes and textures. This space is computed from 3D scans of neutral faces, and scans of facial expressions. The 3D model's versatility with respect to pose and illumination is conveyed to photo-realistic image and video processing by a framework of analysis and synthesis algorithms: The system automatically estimates 3D shape and all relevant rendering parameters, such as pose, from single images. In video, head pose and mouth movements are tracked automatically. Reanimated with new mouth movements, the 3D face is rendered into the original images.}, } @INPROCEEDINGS{Brand2001, author = {M.\ Brand}, title = {Morphable 3d models from video}, booktitle = cvpr, year = {2001}, } @INPROCEEDINGS{Brand2001a, author = {M.\ Brand and R.\ Bhotika}, title = {Flexible flow or 3d nonrigid tracking and shape recovery}, booktitle = cvpr, year = {2001}, pages = {315-322}, owner = {lir}, } @ARTICLE{Cascia2000, author = {M.\ Cascia and S.\ Sclaroff and V.\ Athitsos}, title = {Fast, reliable head tracking under varying illumination: An approach based on registration of texture-mapped 3D models.}, journal = pami, year = {2000}, owner = {lir}, } @INPROCEEDINGS{Cohen1993, author = {I.\ Cohen}, title = {Nonlinear variational method for optical flow computation}, booktitle = {Proc.\ Scandinavian Conf.\ on Image Anal.}, year = {1993}, volume = {1}, pages = {525 - 530}, } @INPROCEEDINGS{Comaniciu1999, author = {D.\ Comaniciu and P.\ Meer}, title = {Meanshift Analysis and Applications}, booktitle = iccv, year = {1999}, } @ARTICLE{Cootes1995, author = {T.\ F.\ Cootes and C.\ J.\ Taylor and D.\ H.\ Cooper and J.\ Graham}, title = {Active Shape Models - Their Training and Application}, journal = cviu, year = {1995}, abstract = {Model-based vision is firmly established as a robust approach to recognizing and locating known rigid objects in the presence of noise, clutter, and occlusion. It is more problematic to apply model-based methods to images of objects whose appearance can vary, though a number of approaches based on the use of flexible templates have been proposed. The problem with existing methods is that they sacrifice model specificity in order to accommodate variability, thereby compromising robustness during image interpretation. We argue that a model should only be able to deform in ways characteristic of the class of objects it represents. We describe a method for building models by learning patterns of variability from a training set of correctly annotated images. These models can be used for image search in an interactive refinement algorithm analogous to that employed by Active Contour Models (Snakes). The key difference is that our Active Shape Models can only deform to fit the data in ways consistent with the training set. We show several practical examples where we have built such models and used them to locate partially occluded objects in noisy, cluttered images.}, } @INPROCEEDINGS{Cootes1998, author = {T.\ Cootes and G.\ Edwards and C.\ Taylor}, title = {Active Appearance Models}, booktitle = eccv, year = {1998}, abstract = {We demonstrate a novel method of interpreting images using an Active Appearance Model(AAM). An AAM contains a statistical model of the shape and grey-level appearance of the object of interest which can generalise to almost any valide example. During a training phase we learn the relationship between model parameter displacements and the residual errors induced between a training image and a synthesised model example. To match to an image we measure the current residuals and use the model to predict changes to the current parameters, leading to a better fit. A goog overall match is obtained in a few iterations, even from poor starting estimates. We describe the technique in detail and give results of quantitative performance tests. We anticipate that the AAM algorithm will be an important method ofr locating deformable objects in many applications.}, } @ARTICLE{Costeira1998, author = {J. P. Costeira and T.\ Kanade}, title = {A multibody factorization method for independently moving objects}, journal = ijcv, year = {1998}, volume = {29}, number = {3}, pages = {159 - 179}, } @INPROCEEDINGS{Davis2003, author = {J.\ Davis and R.\ Ramamoorthi and S.\ Rusinkiewicz}, title = {Spacetime Stereo: A Unifying Framework for Depth from Triangulation}, booktitle = cvpr, year = {2003}, } @INPROCEEDINGS{DeCarlo1996, author = {D.\ DeCarlo and D.\ Metaxas}, title = {The integration of optical flow and deformable models with applications to human face shape and motion estimation}, booktitle = cvpr, year = {1996}, pages = {231-238}, } @ARTICLE{DeCarlo2000, author = {D.\ DeCarlo and D.\ Metaxas}, title = {Optical Flow Constraints on Deformable Models with Applications to Face Tracking}, journal = ijcv, year = {2000}, volume = {32}, number = {2}, pages = {99-127}, month = {July}, abstract = {Optical flow provides a constraint on the motion of a deformable model. We derive and solve a dynamic system incorporating flow as a hard constraint, producing a model-based least-squares optical flow solution. Our solution also ensures the constraint remains satisfied when combined with edge information, which helps combat tracking error accumulation. Constraint enforcement can be relaxed using a Kalman filter, which permits controlled constraint violations based on the noise present in the optical flow information, and enables optical flow and edge information to be combined more robustly and efficiently. We apply this framework to the estimation of face shape and motion using a 3D deformable face model. This model uses a small number of parameters to describe a rich variaty of face shapes and facial expressions. We present experiments in extracting the shape and motion of a face from image sequences which validate the accuracy of the method. They also demonstrate that our treatment of optical flow as a hard constraint , as well as our use of a Kalman filter to reconcile these constraints with the uncertainty in the optical flow, are vital for improving the performance of our system.}, comment = {deformable model-based framework; optical flow as hard constraints, combined with edge force through iterated EKF so that noisy flow constraints are relaxed;}, } @INPROCEEDINGS{Deriche1995, author = {R.\ Deriche and P.\ Kornprobst and G.\ Aubert}, title = {Optical-flow estimation while preserving its discontinuities: A variational approach}, booktitle = accv, year = {1995}, volume = {2}, pages = {290 - 295}, } @PHDTHESIS{Essa1995, author = {I. A.\ Essa}, title = {Analysis, Interpretation and Synthesis of Facial Expressions}, school = {Massachusetts Institute of Technology}, year = {1995}, abstract = {This thesis describes a computer vision system for observing the "action units" of a face using video sequences as input. The visual observation (sensing) is achieved by using an optimal estimation optical flow method coupled with a geometric and a physical(muscle) model describing the facial structure. This modeling results in a time-varying spatial patterning of facial shape and a parametric representation of the independent muscle action groups responsible for the observed facial motions. These muscle action patterns are then used for analysis, interpretation, recognition, and synthesis of facial expressions. Thus, by interpreting facial motions within a physics-based optimal estimation framework, a new control model of facial movement is developed. The newly extracted action units(which we name "FACS+") are both physics and geometry-based, and extend the well known FACS parameters for facial expressions by adding temporal information and non-local spatial patterning of facial motion.}, } @INPROCEEDINGS{Essa1995a, author = {I.\ Essa and A.\ Pentland}, title = {Facial Expression Recognition using a Dynamic Model and Motion Energy}, booktitle = iccv, year = {1995}, } @INPROCEEDINGS{Essa1996, author = {I.\ Essa and S.\ Basu and T.\ Darrel and A.\ Pentland }, title = {Modeling, tracking and interactive animation of faces and heads using input from video}, booktitle = {Proc. of Computer Graphics}, year = {1996}, abstract = {We describe tools that use measurements from video for the extraction of facial modeling and animation parameters, head tracking, and real-time interactive facial animation. These tools share common goals but rely on varying details of physical and geometric modeling and in their input measurement system. Accurate facial modeling involves fine details of geometry and muscle coarticulation. By coupling pixel-by-pixel measurements of surface motion to a physically-based face model and a muscle control model, we have been able to obtain detailed spatio-temporal records of both the displacement of each point on the facial surface and the muscle control required to produce the observed facial motion. We will discuss the importance of this visually extracted representation in terms or realistic facial motion synthesis. A similar method that uses an ellipsoidal model of the head coupled with detailed estimates of visual motion allows accurate tracking of head motion in 3-D. Additionally, by coupling sparse, fast visual measurements with our physically-based model via an interpolation process, we have produced a real-time interactive facial animation/mimicking system. }, } @INPROCEEDINGS{Fieguth1997, author = {P.\ Fieguth and D.\ Terzopoulos}, title = {Color based tracking of heads and other mobile objects at video frame rates}, booktitle = cvpr, year = {1997}, abstract = {We develop a simple and very fast method for object tracking based exclusively on color information in digitized video images. Running on a Silicon Graphics R4600 Indy system with an IndyCam, our algorithm is capable of simultaneously tracking objects at full frame size (640 \Theta 480 pixels) and video frame rate (30 fps). Robustness with respect to occlusion is achieved via an explicit hypothesis-tree model of the occlusion process. We demonstrate the efficacy of our technique in the challenging task of tracking people, especially tracking human heads and hands.}, } @ARTICLE{Geman1984, author = {S.\ Geman and D.\ Geman}, title = {Stochastic Relaxation, Gibbs Distributions, and the Bayesian Restoration of Images}, journal = pami, year = {1984}, volume = {6}, pages = {721-741}, } @INPROCEEDINGS{Gokturk2001, author = {S.\ Gokturk and J.-Y.\ Bouguet and R.\ Grzeszcuzk}, title = {A Data-Driven Model for Monocular Face Tracking}, booktitle = iccv, year = {2001}, owner = {lir}, } @INPROCEEDINGS{Gokturk2002, author = {S.\ Gokturk and J.-Y.\ Bouguet and C.\ Tomasi and B.\ Girod}, title = {Model-Based Face Tracking for View-Independent Facial Expression Recognition}, booktitle = fgr, year = {2002}, abstract = {Facial expression recognition is necessary for designing any realistic human-machine interfaces. Previous published facial expression recognition systems achieve good recognition rates, but most of them perform well only when teh user faces the camera and does not change his 3D head pose. In this study, we propose a new method for robust, view-independent recognition of facial expressions that does not make this assumption. The system uses a novel 3D model-based tracker to extract simultaneously and robustly the pose and shape of teh face at every frame of a monocular video sequence. There are two main contributions of this paper. First, we demonstrate that the 3D information extracted through 3D tracking enables robust facial expression recognition in spite of large rotational and translational head movements (up to 90 degrees in head rotation). Second, we show that Support Vector Machine is a suitable engine for robust classification. Recognition rates as high as 91 percent are achieved at classifying 5 distinct dynamic facial motions (neutral, opening/closing mouth, smile, raising eyebrow).}, } @INPROCEEDINGS{Goldenstein2004, author = {S.\ Goldenstein and C.\ Vogler and D.\ Metaxas}, title = {3D Facial Tracking from Corrupted Movie Sequences}, booktitle = cvpr, year = {2004}, abstract = {In this paper we perform 3D face tracking on corrupted video sequences. We use a deformable model, combined wiith a predictive filter, to recover both the rigid transfoirmations and the values of the parameters that describe the evolution of the facial expressions over time. To be robust, predictive filters need a good observation of the system's state. We describe a new method to measure at each moment in time, the correct distribution of an observation of the parameters of a high-dimensional deformable model. This method is based on bounding the confidence regions of the 2D image displacement with affine forms, and propagation them into parameter space. Using Linderberg's theorem, we measure a good Gaussian approximation of the parameters in a manner that avoids many of the traditional assumptions about the observations' distributions. We demonstrate in experiments on sequences with compression artifacts, and poor-quality video sequences of Lauren Becall and Humphrey Bogart from the 1950s, that, without any learning involved, our method is sufficiently robust to extract information from degraded image sequences. In addition, we provide ground truth validation.}, } @INPROCEEDINGS{Guichard1996, author = {F.\ Guichard and L.\ Rudin}, title = {Accurate estimation of discontinuously optical flow by minimizing divergence related functionals}, booktitle = icip, year = {1996}, volume = {1}, pages = {497 - 500}, } @ARTICLE{Hager1998, author = {G.\ Hager and P.\ Belhumeur}, title = {Efficient Region Tracking With Parametric Models of Geometry and Illumination}, journal = pami, year = {1998}, volume = {20}, number = {10}, pages = {1125-1139}, abstract = {As an object moves through the field of view of a camera, the images of the object may change dramatically. This is not simply due to the translation of the object across the image plane. Rather, complications arise due to the fact that the object undergoes changes in pose relative to the viewing camera, changes in illumination relative to light sources, and may even become partially or fully occluded. In this paper, we develop an efficient, general framework for object tracking¡ªone which addresses each of these complications. We first develop a computationally efficient method for handling the geometric distortions produced by changes in pose. We then combine geometry and illumination into an algorithm that tracks large image regions using no more computation than would be required to track with no accommodation for illumination changes. Finally, we augment these methods with techniques from robust statistics and treat occluded regions on the object as statistical outliers. Throughout, we present experimental results performed on live video sequences demonstrating the effectiveness and efficiency of our methods.}, } @ARTICLE{Heeger1987, author = {D. J.\ Heeger}, title = {Model for the extraction of image flow}, journal = {J. Opt. Soc. Am. A}, year = {1987}, volume = {4}, number = {8}, pages = {1455-1471}, } @ARTICLE{Heitz1993, author = {E.\ Heitz and P.\ Bouthemy}, title = {Multimodal estimation of discontinuous optical flow using {Markov} random fields}, journal = pami, year = {1993}, volume = {15}, number = {12}, pages = {1217 - 1232}, } @ARTICLE{Horn1981, author = {B.\ Horn and B.\ Schunck}, title = {Determining optical flow}, journal = {Artificial Intelligence}, year = {1981}, volume = {17}, number = {1-3}, pages = {185 - 203}, } @INPROCEEDINGS{Horprasert1996, author = {A.\ Horprasert and Y.\ Yacoob and L.\ Davis}, title = {Computing 3D head orientation from a monocular image sequence}, booktitle = {Proc. International Conf. Automatic Face and Gesture Recognition}, year = {1996}, } @INPROCEEDINGS{Huang2000, author = {F.\ Huang and T.\ Chen}, title = {Tracking of multiple faces for human-computer interfaces and virtual environments}, booktitle = icme, year = {2000}, } @INPROCEEDINGS{Jebara1997, author = {Tony S.\ Jebara and Alex Pentland}, title = {Parametrized Structure from Motion for 3D Adaptive Feedback Tracking of Faces}, booktitle = cvpr, year = {1997}, abstract = {A real-time system is described for automatically detecting, modeling and tracking faces in 3D. A closed loop approach is proposed which utilizes structure from motion to generate a 3D model of a face and then feed back the estimated structure to constrain feature tracking in the next frame. The system initializes by using skin classification, symmetry operations, 3D warping and eigenfaces to find a face. Feature trajectories are then computed by SSD or correlation-based tracking. The trajectories are simultaneously processed by an extended Kalman filter to stably recover 3D structure, camera geometry and facial pose. Adaptively weighted estimation is used in this filter by modeling the noise characteristics of the 2D image patch tracking technique. In addition, the structural estimate is constrained by using parametrized models of facial structure (eigen-heads). The Kalman filter's estimate of the 3D state and motion of the face predicts the trajectory of the features which constrains the search space for the next frame in the video sequence. The feature tracking and Kalman filtering closed loop system operates at 25Hz.}, } @ARTICLE{Kumar1996, author = {A.\ Kumar and A.\ Tannenbaum and G.\ Balas}, title = {Optic flow: a curve evolution approach}, journal = tip, year = {1996}, volume = {5}, number = {4}, pages = {598 - 610}, } @ARTICLE{Li1993, author = {H.\ Li and P.\ Rovainen and R.\ Forcheimer}, title = {3D motion estimation in model based facial image coding}, journal = pami, year = {1993}, volume = {15}, number = {6}, pages = {545-555}, abstract = {An approach to estimating the motion of the head and facial expressions in model-based facial image coding is presented. An affine nonrigid motion model is set up. The specific knowledge about facial shape and facial expression is formulated in this model in the form of parameters. A direct method of estimating the two-view motion parameters that is based on the affine method is discussed. Based on the reasonable assumption that the 3-D motion of the face is almost smooth in the time domain, several approaches to predicting the motion of the next frame are proposed. Using a 3-D model, the approach is characterized by a feedback loop connecting computer vision and computer graphics. Embedding the synthesis techniques into the analysis phase greatly improves the performance of motion estimation. Simulations with long image sequences of real-world scenes indicate that the method not only greatly reduces computational complexity but also substantially improves estimation accuracy. }, } @ARTICLE{Liao1997, author = {W.-H.\ Liao and S. J.\ Aggrawal and J. K.\ Aggrawal}, title = {The reconstruction of dynamic 3{D} structure of biological objects using stereo microscope images}, journal = mva, year = {1997}, volume = {9}, pages = {166 - 178}, } @INPROCEEDINGS{Lucas1981, author = {B. D. \Lucas and T. \Kanade}, title = {An interative image registration technique with an application to stereo vision}, booktitle = ijcai, year = {1981}, pages = {674 - 679}, } @ARTICLE{Malassiotis1997, author = {S.\ Malassiotis and M. G.\ Strintzis}, title = {Model-based joint motion and structure estimation from stereo images}, journal = cviu, year = {1997}, volume = {65}, number = {1}, pages = {79 - 94}, abstract = {In this paper we integrate motion and structure estimation in order to exploit their coherence. We propose an algorithm that uses models for object surfaces and their motion and estimates the model parameters using the image intensity matching criterion. The visible scene surface is represented with a parametrically deformable, spatially adaptive, wireframe model. Object motion is first modeled using the well-known rigid motion assumption along with the quaternion rotation representation. Nonrigid motion modeling using the finite element technique is also investigated as an alternative to rigid motion modeling or as a refinement of it. A functional containing image matching and surface smoothness constraints is minimized with respect to the unknown model parameters. A discontinuity detection scheme allowing deactivation of smoothness constraints across object boundaries, is investigated. A regularization scheme using a coarse to fine strategy is employed. }, } @ARTICLE{Memin1998, author = {E.\ M\'emin and P.\ P\'erez}, title = {Dense estimation and object-oriented segmentation of the optical flow with robust techniques}, journal = tip, year = {1998}, volume = {7}, pages = {703 - 719}, } @ARTICLE{Metaxas1993, author = {D.\ Metaxas and D.\ Terzopoulos}, title = {Shape and nonrigid motion estimation through physics-based synthesis}, journal = pami, year = {1993}, volume = {15}, number = {6}, pages = {580 - 591}, } @ARTICLE{Nagel1986, author = {H.-H.\ Nagel and W.\ Enkelmann}, title = {An investigation of smoothness contraints for the estimation of displacement vector fields from image sequences}, journal = pami, year = {1986}, volume = {8}, pages = {565 - 593}, } @INPROCEEDINGS{Nastar1996, author = {C.\ Nastar and B.\ Moghaddam and A.\ Pentland}, title = {Generalized image matching: Statistical learning of physically-based deformations}, booktitle = eccv, year = {1996}, pages = {589-598}, } @ARTICLE{Nesi1993, author = {P.\ Nesi}, title = {Variational approach to optical flow estimation managing discontinuities}, journal = ivc, year = {1993}, volume = {11}, pages = {419 - 439}, } @ARTICLE{Nesi1996, author = {P.\ Nesi and R.\ Nagnolfi}, title = {Tracking and Synthesizing Facial Motions with Dynamic Contours}, journal = {Real Time Imaging}, year = {1996}, abstract = {Many researchers have studied techniques related to the analysis and synthesis of human heads under motion with face deformations. These techniques can be used for defining low-rate image compression algorithms (model-based image coding), cinema technologies, video-phones, as well as for applications of virtual reality, etc. Such techniques need a real-time performance and a strong integration between the mechanisms of motion estimation and those of rendering and animation of the 3D synthetic head/face. In this paper, a complete and integrated system for tracking and synthesizing facial motions in real-time with low-cost architectures is presented. Facial deformations curves represented as spatiotemporal B-splines are used for tracking in order to model the main facial features. In addition, the system proposed is capable of adapting a generic 3D wire-frame model of a head/face to the face that must be tracked; therefore, the simulations of the face deformations are produced by using a realistic patterned face. }, booktitle = {Real-Time Imaging}, } @INPROCEEDINGS{Oliver1997, author = {N.\ Oliver and A.\ Pentland and F.\ B$\acute{¨¦}$rard }, booktitle = cvpr, year = {1997}, abstract = {This paper describes an active-camera real-time system for tracking, shape description, and classification of the human face and mouth using only an SGI Indy computer. The system is based on use of 2-D blob features, which are spatially-compact clusters of pixels that are similar in terms of low-level image properties. Patterns of behavior (e.g., facial expressions and head movements) can be classified in real-time using Hidden Markov Model (HMM) methods. The system has been tested on hundreds of users and has demonstrated extremely reliable and accurate performance. Typical classification accuracies are near 100%.}, } @BOOK{Pearl1988, title = {Probabilistic Inference in Intelligent Systems}, publisher = {Morgan Kaufmann}, year = {1988}, author = {J.\ Pearl}, } @ARTICLE{Pentland1991, author = {A. P.\ Pentland and B.\ Horowitz}, title = {Recovery of nonrigid motion and structure}, journal = pami, year = {1991}, volume = {13}, number = {7}, pages = {730 - 742}, } @TECHREPORT{Pighin1997, author = {F.\ Pighin and J.\ Auslander and D.\ Lischinski and D.\ Salesin and R.\ Szeliski}, title = {Realistic Facial Animation Using Image-Based 3D Morphing}, institution = {University of Washington}, year = {1997}, number = {UW-CSE-97-01-03}, abstract = {We present new techniques for creating a realistic textured 3D facial model from several photographs of a human subject and for performing facial animation by morphing between models corresponding to different facial expressions. Starting from several uncalibrated views of an individual, we employ a user assisted technique to recover the camera poses corresponding to the views, as well as the 3D coordinates of a sparse set of chosen locations on the individual's face. Having recovered the camera poses and the facial geometry, we extract from the input images a texture map for the model. An optical flow technique is used for improving the registration of the input images in texture space. This process is repeated for several facial expressions of a particular individual. To animate between these facial expressions we use 3D shape morphing between the corresponding facial models, while at the same time blending the corresponding textures. Using our technique we have been able to generate highly realistic facial models and natural looking transitions between different expressions.}, comment = {initialization: manual marking of corresponding features => used to deform the 3D face mesh s.t. it has a closer correspondence to with face of the current human subject; then photos are texture mapped onto the human subject; employ an optical pixel flow technique for fine, subpixel registration of texture maps; face morphing is used for animation }, } @INPROCEEDINGS{Pighin1998, author = {F. Pighin and J. Hecker and D. Lischinski and R. Szeliski and D.\ Salesin}, title = {Synthesizing realistic facial expressions from photographs}, booktitle = sig, year = {1998}, pages = {75 - 84}, } @INPROCEEDINGS{Pighin1999, author = {F.\ Pighin and R.\ Szeliski and D.\ Salesin}, title = {Resynthesizing facial animation through 3D model-based tracking}, booktitle = iccv, year = {1999}, } @ARTICLE{Scharstein2002, author = {D.\ Scharstein and R.\ Szeliski}, title = {A Taxonomy and Evaluation of Dense Two-Frame Correspondence}, journal = ijcv, year = {2002}, volume = {47}, pages = {7 - 42}, } @INPROCEEDINGS{Schnorr1994, author = {C.\ Schn$\ddot{o}$rr}, title = {Segmentation of visual motion by minimizing convex non-quadratic functions}, booktitle = icpr, year = {1994}, pages = {661 - 663}, } @INPROCEEDINGS{Schodl1998, author = {A.\ Sch$\ddot{o}$dl and A.\ Haro and I. A.\ Essa}, title = {Head Tracking Using a Textured Polygonal Model}, booktitle = {Workshop on Perceptual User Interfaces}, year = {1998}, abstract = {We describe the use of a three-dimensional textured model of the human head under perspective projection to track a person's face. The system is hand-initialized by projecting an image of the face onto a polygonal head model. Tracking is achieved by finding the six translation and rotation parameters to register the rendered images of the textured model with the video images. We find the parameters by mapping the derivative of the error with respect to the parameters to intensity gradients in the image. We use a robust estimator to pool the information and do gradient descent to find an error minimum.}, } @ARTICLE{Shi1994, author = {Y. Q.\ Shi and C. Q.\ Shu and J. N.\ Pan}, title = {Unified optical flow field approach to motion analysis from a sequence of stereo images}, journal = pr, year = {1994}, volume = {27}, number = {12}, pages = {1577 - 1590}, } @INPROCEEDINGS{Simoncelli1991, author = {E. P.\ Simoncelli and E. H.\ Adelson and D. J.\ Heeger}, title = {Probability Distributions of Optical Flow }, booktitle = cvpr, year = {1991}, } @INCOLLECTION{Simoncelli1999, author = {E. P.\ Simoncelli}, title = {Bayesian Multi-Scale Differential Optical Flow}, booktitle = {Handbook of Computer Vision and Applications}, publisher = {Academic Press}, year = {1999}, editor = {B. Jahne and H. Haussecker and P. Geissler}, chapter = {14}, pages = {397 - 422}, } @MISC{stereourl, author = {\url{http://www.middlebury.edu/stereo}}, } @ARTICLE{Szeliski1990, author = {R.\ Szeliski}, title = {Baysian Modeling of Uncertainty in Low-Level Vision}, journal = ijcv, year = {1990}, volume = {5}, number = {3}, pages = {271 - 301}, } @INPROCEEDINGS{Tao1999, author = {H.\ Tao and T.\ Huang}, title = {Explanation-based facial motion tracking using a piece-wised B$\acute{e}$zier volume deformation model}, booktitle = cvpr, year = {1999}, } @ARTICLE{Tao2002, author = {H.\ Tao and T.\ Huang}, title = {Visual Estimation and Compression of Facial Motion Parameters: Elements of a 3D Model-Based Video Coding System}, journal = ijcv, year = {2002}, volume = {50}, number = {2}, pages = {111-125}, abstract = {The MPEG4 standard supports the transmission and composition of facial animation with natural video by including a facial animation parameter (FAP) set that is defined based on the study of minimal facial actions and is closely related to muscle actions. The FAP set enables model-based representation of natural or synthetic talking head sequences and allows intelligible visual reproduction of facial expressions, emotions, and speech pronunciations at the receiver. This paper describes two key components we have developed for building a model-based video coding system: (1) a method for estimating FAP parameters based on our previously proposed piecewise B¨¦zier volume deformation model (PBVD), and (2) various methods for encoding FAP parameters. PBVD is a linear deformation model suitable for both the synthesis and the analysis of facial images. Each FAP parameter is a basis function in this model. Experimental results on PBVD-based animation, model-based tracking, and spatial-temporal compression of FAP parameters are demonstrated in this paper}, } @ARTICLE{Terzopulos1993, author = {D.\ Terzopulos and K.\ Water}, title = {Analysis and synthesis of facial image sequences using phsical and anatomical models}, journal = pami, year = {1993}, volume = {15}, number = {6}, pages = {569-579}, abstract = {An approach to the analysis of dynamic facial images for the purposes of estimating and resynthesizing dynamic facial expressions is presented. The approach exploits a sophisticated generative model of the human face originally developed for realistic facial animation. The face model which may be simulated and rendered at interactive rates on a graphics workstation, incorporates a physics-based synthetic facial tissue and a set of anatomically motivated facial muscle actuators. The estimation of dynamical facial muscle contractions from video sequences of expressive human faces is considered. An estimation technique that uses deformable contour models (snakes) to track the nonrigid motions of facial features in video images is developed. The technique estimates muscle actuator controls with sufficient accuracy to permit the face model to resynthesize transient expressions. }, } @INPROCEEDINGS{Torresani2001, author = {L.\ Torresani and D. B.\ Yang and E. J.\ Alexander and C.\ Bregler}, title = {Tracking and Modeling Non-Rigid Objects with Rank Constraints}, booktitle = cvpr, year = {2001}, abstract = {This paper presents a novel solution for flow-based tracking and 3D reconstruction of deforming objects in monocular image sequences. A non-rigid 3D object undergoing rotation and deformation can be effectively approximated using a linear combination of 3D basis shapes. This puts a bound on the rank of the tracking matrix. The rank constraint is used to achieve robust and precise low-level optical flow estimation without prior knowledge of the 3D shape of the object. The bound on the rank is also exploited to handle occlusion at the tracking level leading to the possibility of recovering the complete trajectories of occluded/disoccluded points. Following the same low-rank principle, the resulting flow matrix can be factored to get the 3D pose, configuration coefficients, and 3D basis shapes. The flow matrix is factored in an iterative manner, looping between solving for pose, configuration, and basis shapes. The flow-based tracking is applied to several video sequences and provides the input to the 3D non-rigid reconstruction task. Additional results on synthetic data and comparisons to ground truth complete the experiments. }, } @BOOK{Ullman1979, title = {The interpretation of Visual Motion}, publisher = {MIT Press}, year = {1979}, author = {S.\ Ullman}, } @ARTICLE{Ullman1984, author = {S.\ Ullman}, title = {Maximizing the rigidity: The incremental recovery of 3-{D} shape and nonrigid motion}, journal = {Perception}, year = {1984}, volume = {13}, pages = {730 - 742}, } @INPROCEEDINGS{Vedula1999, author = {S.\ Vedula and S.\ Baker and P.\ Rander and R.\ Collins and T.\ Kanade}, title = {Three-Dimensional Scene Flow}, booktitle = iccv, year = {1999}, volume = {2}, pages = {722 - 729}, } @ARTICLE{Waxman1986, author = {A. M.\ Waxman and J. H.\ Duncan}, title = {Binocular image flows: Steps toward stereo-motion fusion}, journal = pami, year = {1986}, volume = {8}, number = {6}, pages = {715 - 729}, } @INPROCEEDINGS{Weickert1998, author = {J.\ Weickert}, title = {On discontinuity-preserving optic flow}, booktitle = {Proc.\ Computer Vision and Mobile Robotics Workshop ({CVMR}'98)}, year = {1998}, pages = {115 - 122}, } @INPROCEEDINGS{Wen2003, author = {Z.\ Wen and T.\ Huang}, title = {Capturing Subtile Facial Motions in 3D Face Tracking}, booktitle = iccv, year = {2003}, abstract = {Facial motions produce not only facial feature points motions, but also subtle appearance changes such as wrinkles and shading changes. These subtle changes are important yet difficult issues for both analysis (tracking) and synthesis(animation). Previous approaches were mostly based on models learned from extensive training appearance examples. However, the space of all possible facial motion appearance is huge. Thus, it is not feasible to collect samples covering all possible variations due to lighting conditions, individualities, and head poses. Therefore, it is difficult to adapt such models to new conditions. In this paper, we present an adaptive techniquefor analyzing subtle facial appearance changes. We propose a new ratio-image based appearance feature, which is independent of a person's face albedo. This feature is used to track face appearance model to new people and lighting conditions, we develop an online EM-based algorithm. Experiments show that the proposed method improves classification results in a facial experssion recognition task, where a variety of people and lighting conditions are involved.}, } @INPROCEEDINGS{Yacoob1994, author = {Y.\ Yacoob and L.\ Davis}, title = {Computing Spatio-Temporal Representations of Human Faces}, booktitle = cvpr, year = {1994}, abstract = {An approach for analysis and representation of facial dynamics for recognition of facial expressions from image sequences is proposed. The algorithms we develop utilize optical flow computation to identify the direction of rigid and non-rigid motions that are caused by human facial expressions. A mid-level symbolic representation that is motivated by linguistic and psychological considerations is developed. Recognition of six facial expressions, as well as eye blinking, on a large set of image sequences is reported.}, } @TECHREPORT{Yang2001, author = {R.-G.\ Yang and Z.-Y.\ Zhang}, title = {Model-based Head Pose Tracking with Stereovision}, institution = {Microsoft Research}, year = {2001}, number = {MSR-TR-2001-102}, } @ARTICLE{Young1999, author = {G. S.\ Young and R.\ Chellappa}, title = {3-{D} motion estimation using a sequence of noisy stereo images: Models, estimation, and uniqueness}, journal = pami, year = {1999}, volume = {12}, number = {8}, pages = {735 - 759}, } @ARTICLE{Zhang1992, author = {Z.\ Zhang and O.\ Faugeras}, title = {Estimation of displacements from two 3-{D} frames obtained from stereo}, journal = pami, year = {1992}, volume = {14}, number = {12}, pages = {1141 - 1156}, } @BOOK{Zhang1992a, title = {3{D} Dynamic Scene Analysis}, publisher = {Springer-Verlag}, year = {1992}, author = {Z.\ Zhang and O.\ Faugeras}, } @INPROCEEDINGS{Zhang2000, author = {Y.\ Zhang and C.\ Kambhamettu}, title = {Integrated 3{D} Scene Flow and Structure Recovery from Multiview Image Sequences}, booktitle = cvpr, year = {2000}, } @INPROCEEDINGS{Zhang2001, author = {Y.\ Zhang and C.\ Kambhamettu}, title = {On 3{D} Scene Flow and Structure Estimation}, booktitle = cvpr, year = {2001}, } @INPROCEEDINGS{Zhang2003, author = {L.\ Zhang and B.\ Curless and S. M.\ Seitz}, title = {Spacetime Stereo: Shape Recovery for Dynamic Scenes}, booktitle = cvpr, year = {2003}, } @INPROCEEDINGS{Zhang2004, author = {L.\ Zhang and N.\ Snavely and B.\ Curless and S. M.\ Steven}, title = {Spacetime Faces: High Resolution Capture for Modeling and Animation}, booktitle = sig, year = {2004}, abstract = {We present an end-to-end system that goes from video sequences to high resolution, editable, dynamically controllable face models. The capture system employs synchronized video cameras and structured light projectors to record videos of a moving face from multiple viewpoints. A novel spacetime stereo algorithm is introduced to compute depth maps accurately and overcome over-fitting deficiencies in prior work. A new template fitting and tracking procedure fills in missing data and yields point correspondence across the entire sequence without using markers. We demonstrate a data-driven, interactive method for inverse kinematics that draws on the large set of fitted templates and allows for posing new expressions by dragging surface points directly. Finally, we describe new tools that model the dynamics in the input sequence to enable new animations, created via key-framing or texture-synthesis techniques.}, keywords = {shape recovery, facial animation, stereo matching, shape registration, data-driven animation, expression analysis}, } @INPROCEEDINGS{, }