@inproceedings{discovery10132236,
         journal = {ICML},
            year = {2021},
           title = {Prioritized Level Replay},
       publisher = {PMLR: Proceedings of Machine Learning Research},
            note = {This version is the version of record. For information on re-use, please refer to the publisher's terms and conditions.},
         address = {Online Only},
          volume = {139},
          editor = {M Meila and T Zhang},
       booktitle = {Proceedings of the 38th International Conference on Machine Learning},
           pages = {4940--4950},
          author = {Jiang, M and Grefenstette, E and Rockt{\"a}schel, T},
        abstract = {Environments with procedurally generated content serve as important benchmarks for testing systematic generalization in deep reinforcement learning. In this setting, each level is an algorithmically created environment instance with a unique configuration of its factors of variation. Training on a prespecified subset of levels allows for testing generalization to unseen levels. What can be learned from a level depends on the current policy, yet prior work defaults to uniform sampling of training levels independently of the policy. We introduce Prioritized Level Replay (PLR), a general framework for selectively sampling the next training level by prioritizing those with higher estimated learning potential when revisited in the future. We show TD-errors effectively estimate a level's future learning potential and, when used to guide the sampling procedure, induce an emergent curriculum of increasingly difficult levels. By adapting the sampling of training levels, PLR significantly improves sample-efficiency and generalization on Procgen Benchmark-matching the previous state-of-the-art in test return-and readily combines with other methods. Combined with the previous leading method, PLR raises the state-of-the-art to over 76\% improvement in test return relative to standard RL baselines.},
             url = {http://proceedings.mlr.press/v139/http://proceedings.mlr.press/v139/jiang21b.html}
}