@inproceedings{discovery10196975, booktitle = {Proceedings - IEEE International Conference on Robotics and Automation}, publisher = {IEEE}, month = {August}, volume = {2024}, address = {Yokohama, Japan}, series = {IEEE International Conference on Robotics and Automation (ICRA)}, journal = {Proceedings - IEEE International Conference on Robotics and Automation}, year = {2024}, title = {RoboTAP: Tracking Arbitrary Points for Few-Shot Visual Imitation}, note = {This version is the author-accepted manuscript. For information on re-use, please refer to the publisher's terms and conditions.}, pages = {5397--5403}, keywords = {Training, Visualization, Tracking, Stacking, Production facilities, Planning, Task analysis}, abstract = {For robots to be useful outside labs and specialized factories we need a way to teach them new useful behaviors quickly. Current approaches lack either the generality to onboard new tasks without task-specific engineering, or else lack the data-efficiency to do so in an amount of time that enables practical use. In this work we explore dense tracking as a representational vehicle to allow faster and more general learning from demonstration. Our approach utilizes Track-Any-Point (TAP) models to isolate the relevant motion in a demonstration, and parameterize a low-level controller to reproduce this motion across changes in the scene configuration. We show this results in robust robot policies that can solve complex object-arrangement tasks such as shape-matching, stacking, and even full path-following tasks such as applying glue and sticking objects together, all from demonstrations that can be collected in minutes.}, url = {https://doi.org/10.1109/ICRA57147.2024.10611409}, author = {Vecerik, Mel and Doersch, Carl and Yang, Yi and Davchev, Todor and Aytar, Yusuf and Zhou, Guangyao and Hadsell, Raia and Agapito, Lourdes and Scholz, Jon}, issn = {1050-4729} }