@inproceedings{discovery10193055, title = {Online Multi-Robot Coverage Path Planning in Dynamic Environments Through Pheromone-Based Reinforcement Learning}, year = {2024}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, journal = {2024 IEEE 20th International Conference on Automation Science and Engineering}, month = {October}, series = {IEEE International Conference on Automation Science and Engineering (CASE)}, booktitle = {Proceedings of the 2024 IEEE 20th International Conference on Automation Science and Engineering (CASE)}, pages = {1000--1005}, volume = {20}, note = {This version is the author accepted manuscript. For information on re-use, please refer to the publisher's terms and conditions.}, address = {Bari, Italy}, url = {https://doi.org/10.1109/CASE59546.2024.10711550}, abstract = {Two promising approaches to coverage path planning are reward-based and pheromone-based methods. Rewardbased methods allow heuristics to be learned automatically, often yielding a superior performance over hand-crafted rules. On the other hand, pheromone-based methods consistently demonstrate superior generalization and adaptation abilities when placed in unfamiliar environments. To obtain the best of both worlds, we introduce Greedy Entropy Maximization (GEM), a hybrid approach that aims to maximize the entropy of a pheromone deposited by a swarm of homogeneous antlike agents. We begin by establishing a sharp upper-bound on achievable entropy and show that this corresponds to optimal dynamic coverage path planning. Next, we demonstrate that GEM closely approaches this upper-bound despite depriving agents of basic necessities such as memory and explicit communication. Finally, we show that GEM can be executed asynchronously in constant-time, enabling it to scale arbitrarily.}, issn = {2161-8089}, author = {Champagnie, Kale and Chen, Boli and Arvin, Farshad and Hu, Junyan}, keywords = {Computer aided software engineering, Automation, Neural networks, Reinforcement learning, Path planning, Entropy} }