@article{Hoffman:2009b,
  title = "An Expectation Maximization Algorithm for Continuous {Markov} Decision Processes with Arbitrary Reward",
  author = "Matthew Hoffman and Nando de Freitas and Arnaud Doucet and Jan Peters",
  year = "2009",
  journal = "Journal of Machine Learning Research - Proceedings Track for Artificial Intelligence and Statistics (AISTATS)",
  pages = "232--239",
  url = "http://jmlr.org/proceedings/papers/v5/hoffman09a.html",
  volume = "5",
}