@inproceedings{discovery10178561, note = {This version is the author accepted manuscript. For information on re-use, please refer to the publisher's terms and conditions.}, address = {Ha Long, Vietnam}, publisher = {IEEE}, booktitle = {Proceedings of the 2024 IEEE/SICE International Symposium on System Integration (SII)}, month = {January}, year = {2024}, title = {Reinforcement Learning-based Grasping via One-Shot Affordance Localization and Zero-Shot Contrastive Language-Image Learning}, keywords = {Location awareness, Affordances, Pipelines, Grasping, System integration, Robots, Videos}, abstract = {We present a novel robotic grasping system using a caging-style gripper, that combines one-shot affordance localization and zero-shot object identification. We demonstrate an integrated system requiring minimal prior knowledge, focusing on flexible few-shot object agnostic approaches. For grasping a novel target object, we use as input the color and depth of the scene, an image of an object affordance similar to the target object, and an up to three-word text prompt describing the target object. We demonstrate the system using real-world grasping of objects from the YCB benchmark set, with four distractor objects cluttering the scene. Overall, our pipeline has a success rate of the affordance localization of 96\%, object identification of 62.5\%, and grasping of 72\%. Videos are on the project website: https://sites.google.com/view/ rl-affcorrs-grasp.}, url = {https://doi.org/10.1109/SII58957.2024.10417178}, author = {Long, Xiang and Beddow, Luke and Hadjivelichkov, Denis and Delfaki, Andromachi Maria and Wurdemann, Helge and Kanoulas, Dimitrios} }