[home] [search] BibTeX of Marcus Hutter [contact] [up] 

Publications: 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2005, 2004, 2003, 2002, 2001, 2000, older.

%-------------Publications-of-Marcus-Hutter-2024--------------%

@misc{Hutter:24aixisafe,
  author =       "Marcus Hutter",
  title =        "ASI Safety via AIXI",
  _month =        nov,
  year =         "2024",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixisafe",
  slides =       "http://www.hutter1.net/publ/saixisafe.pdf",
  video =        "http://youtu.be/qHqv3GvWBTM",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  abstract =     "Universal AI is a mathematical theory of the ultimate Artificial
                  Super-Intelligence (ASI). More precisely, AIXI is an elegant
                  parameter-free theory of an optimal reinforcement learning agent
                  embedded in an arbitrary unknown environment that possesses
                  essentially all aspects of rational intelligence. One would
                  expect that very intelligent agents would take actions to
                  further their goals, posing a potential hazard unless those
                  goals are aligned with that of humans. AIXI and variations are
                  ideally suited for investigating questions around such
                  ASI-safety issues with mathematical rigor. After a brief
                  introduction to AIXI, I present these alignment and other safety
                  problems and some solutions in the context of Universal AI.
                  While the talk/slides are informal, all claims are backed up by
                  rigorous math.",
}
@article{Hutter:24kernelait,
  title        = {Bridging Algorithmic Information Theory and Machine Learning: A New Approach to Kernel Learning},
  author       = {Hamzi, Boumediene and Hutter, Marcus and Owhadi, Houman},
  year         = 2024,
  journal      = {\href{http://arxiv.org/abs/2311.12624}{arXiv:2311.12624}},
  doi          = {10.1016/j.physd.2024.134153},
  url          = {http://www.sciencedirect.com/journal/physica-d-nonlinear-phenomena/about/call-for-papers},
  _month       = aug,
  bibtex       = {http://www.hutter1.net/official/bib.htm#kernelait}
}
@inproceedings{Hutter:24trainsol,
  title        = {Learning Universal Predictors},
  author       = {Grau-Moya, Jordi and Genewein, Tim and Hutter, Marcus and Orseau, Laurent and Deletang, Gregoire and Catt, Elliot and Ruoss, Anian and Wenliang, Li Kevin and Mattern, Christopher and Aitchison, Matthew and Veness, Joel},
  year         = 2024,
  booktitle    = {41st International Conference on Machine Learning},
  url          = {http://openreview.net/forum?id=B1ajnQyZgK},
  _month       = jul,
  bibtex       = {http://www.hutter1.net/official/bib.htm#trainsol},
  arxiv        = {http://arxiv.org/abs/2401.14953},
  press        = {http://medium.com/syncedreview/neural-networks-on-the-brink-of-universal-prediction-with-deepminds-cutting-edge-approach-2de9af5b4e3f}
}
@inproceedings{Hutter:24distrmerl,
  title        = {Distributional Bellman Operators over Mean Embeddings},
  author       = {Wenliang, Li Kevin and Deletang, Gregoire and Aitchison, Matthew and Hutter, Marcus and Ruoss, Anian and Gretton, Arthur and Rowland, Mark},
  year         = 2024,
  booktitle    = {41st International Conference on Machine Learning},
  url          = {http://openreview.net/forum?id=lGOPBl9tfb},
  _month       = jul,
  bibtex       = {http://www.hutter1.net/official/bib.htm#distrmerl},
  arxiv        = {http://arxiv.org/abs/2312.07358}
}
@Book{Hutter:24uaibook2,
  author =       "Marcus Hutter and David Quarel and Elliot Catt",
  title =        "An Introduction to Universal Artificial Intelligence",
  series =       "Chapman \& Hall/CRC Artificial Intelligence and Robotics Series",
  publisher =    "Taylor and Francis",
  _month =       may,
  year =         "2024",
  isbn =         "Paperback:9781032607023, Harcover:9781032607153, eBook:9781003460299",
  pages =        "500",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uaibook2",
  doi =          "10.1201/9781003460299"
  _note =         "500+ pages, http://www.hutter1.net/ai/uaibook2.htm",
  url =          "http://www.hutter1.net/ai/uaibook2.htm",
  http =         "http://www.routledge.com/An-Introduction-to-Universal-Artificial-Intelligence/Hutter-Catt-Quarel/p/book/9781032607023",
  slides =       "http://www.hutter1.net/ai/suaibook.pdf",
  video =        "http://cartesiancafe.podbean.com/e/marcus-hutter-universal-artificial-intelligence-and-solomonoff-induction/",
  keywords =     "Artificial general intelligence; algorithmic information theory;
                  Bayes mixture distributions; universal sequence prediction;
                  context tree weighting; rational agents; sequential decision theory;
                  universal intelligent agents; reinforcement learning;
                  games and multi-agent systems; approximation/implementation/application;
                  AGI-safety; philosophy of AI.",
  abstract =     "`An Introduction to Universal Artificial Intelligence'
                  provides the formal underpinning of what it means for an agent 
                  to act intelligently in an unknown environment. 
                  First presented in `Universal Algorithmic Intelligence' (Hutter, 2000), 
                  UAI offers a framework in which virtually all AI problems can be formulated, 
                  and a theory of how to solve them. 
                  UAI unifies ideas from sequential decision theory, 
                  Bayesian inference, and algorithmic information theory to construct AIXI, 
                  an optimal reinforcement learning agent 
                  that learns to act optimally in unknown environments. 
                  AIXI is the theoretical gold standard for intelligent behavior.
                      The book covers both the theoretical and practical aspects of UAI. 
                  Bayesian updating can be done efficiently with context tree weighting, 
                  and planning can be approximated by sampling with Monte Carlo tree search. 
                  It provides algorithms for the reader to implement, 
                  and experimental results to compare against. 
                  These algorithms are used to approximate AIXI. 
                  The book ends with a philosophical discussion of Artificial General Intelligence: 
                  Can super-intelligent agents even be constructed? 
                  Is it inevitable that they will be constructed,
                  and what are the potential consequences?
                      This text is suitable for late undergraduate students. 
                  It provides an extensive chapter to fill in the required 
                  mathematics, probability, information, 
                  and computability theory background.",
  support =      "ARC grant DP150104590",
  for =          "010404(20%),080101(20%),080198(20%),080299(10%),080401(30%)",
}
@InProceedings{Hutter:24lmiscompr,
  author =       "Gregoire Deletang and Anian Ruoss and Paul-Ambroise Duquenne and Elliot Catt and Tim Genewein and Christopher Mattern and Jordi Grau-Moya and Li Kevin Wenliang and Matthew Aitchison and Laurent Orseau and Marcus Hutter and Joel Veness",
  title =        "Language Modeling Is Compression",
  booktitle =    "Proc. 12th International Conference on Learning Representations ({ICLR'24})",
  _number =       "DM:rh/P39768",
  address =      "Vienna, Austria",
  _month =        may,
  year =         "2024",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lmiscompr",
  http =         "http://openreview.net/forum?id=jznbgiynus",
  arxiv =        "http://arxiv.org/abs/2309.10668",
  pdf =          "http://www.hutter1.net/publ/lmiscompr.pdf",
  slides =       "http://www.hutter1.net/publ/slmiscompr.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#infoth",
  press =        "http://twitter.com/search?q=%22language%20modeling%20is%20compression%22",
  code =         "http://www.hutter1.net/publ/lmiscompr.cpp",
  keywords =     "lossless compression; arithmetic coding; language models; scaling laws; in-context learning",
  abstract =     "It has long been established that predictive models can be transformed 
                  into lossless compressors and vice versa. Incidentally, in recent years, 
                  the machine learning community has focused on training increasingly large 
                  and powerful self-supervised (language) models. 
                  Since these large language models exhibit impressive predictive capabilities, 
                  they are well-positioned to be strong compressors. 
                  In this work, we advocate for viewing the prediction problem 
                  through the lens of compression and evaluate the compression capabilities of large (foundation) models.
                  We show that large language models are powerful general-purpose predictors 
                  and that the compression viewpoint provides novel insights into scaling laws, 
                  tokenization, and in-context learning. For example, Chinchilla 70B, 
                  while trained primarily on text, compresses ImageNet patches to 43.4\% 
                  and LibriSpeech samples to 16.4\% of their raw size, 
                  beating domain-specific compressors like PNG (58.5\%) or FLAC (30.3\%), 
                  respectively. Finally, we show that the prediction-compression equivalence 
                  allows us to use any compressor (like gzip) to build a conditional generative model.",
  znote =        "received over a million Twitter views and was there the ``top ML paper of week'': 
                  http://twitter.com/search?q=%22language%20modeling%20is%20compression%22
                  Acceptance rate: 2251/7262=31\%",
}
@TechReport{Hutter:24imisafe,
  author =       "Michael Cohen and Marcus Hutter",
  title =        "Imitation Learning is Probably Existentially Safe",
  institution =  "Open Philantropy",
  _month =        apr,
  year =         "2024",
  bibtex =       "http://www.hutter1.net/official/bib.htm#imisafe",
  url =          "http://www.openphilanthropy.org/wp-content/uploads/Imitation_Learning_Safe_ready.pdf",
}
@inproceedings{Hutter:24cnctrafo,
  title        = {Generative Reinforcement Learning with Transformers},
  author       = {Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Tim Genewein and Jordi Grau and Marcus Hutter and Joel Veness},
  year         = 2024,
  url          = {http://openreview.net/forum?id=6qtDu7hVPF},
  _booktitle   = {Submitted to The Twelfth International Conference on Learning Representations},
  _month       = feb,
  bibtex       = {http://www.hutter1.net/official/bib.htm#cnctrafo},
  _note        = {under review}
}
@inproceedings{Hutter:24truncvalf,
  title        = {Policy Gradient without Boostrapping via Truncated Value Learning},
  author       = {Matthew Aitchison and Penny Sweetser and Gregoire Deletang and Marcus Hutter},
  year         = 2024,
  url          = {http://openreview.net/forum?id=nBYDP46s5N},
  _booktitle   = {Submitted to The Twelfth International Conference on Learning Representations},
  _month       = feb,
  bibtex       = {http://www.hutter1.net/official/bib.htm#truncvalf},
  _note        = {under review}
}
@inproceedings{Hutter:24hedgeaixi,
  title        = {Dynamic Knowledge Injection for {AIXI} Agents},
  author       = {Yang-Zhao, Samuel and Ng, Kee Siong and Hutter, Marcus},
  year         = 2024,
  booktitle    = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume       = 38,
  number       = 15,
  pages        = {16388--16397},
  doi          = {http://doi.org/10.1609/aaai.v38i15.29575},
  _month       = feb,
  bibtex       = {http://www.hutter1.net/official/bib.htm#hedgeaixi},
  znote        = {Accept rate: 2342/9862=23.75\%}
}

%-------------Publications-of-Marcus-Hutter-2023--------------%

@InProceedings{Hutter:23onadallm,
  title =        "Revisiting Dynamic Evaluation: Online Adaptation for Large Language Models",
  author=        "Amal Rannen-Triki and Jorg Bornschein and Razvan Pascanu and Alexandre Galashov and Michalis Titsias and Marcus Hutter and Andras Gyorgy and Yee Whye Teh",
  booktitle =    "NeurIPS Workshop on Distribution Shifts: New Frontiers with Foundation Models",
  _number =      "DM:rh/P49871",
  address =      "New Orleans, USA",
  _month =        dec,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#onadallm",
  http =         "http://openreview.net/forum?id=iRz8qi7QB8",
  url =          "http://nips.cc/virtual/2023/80505",
  pdf =          "http://www.hutter1.net/publ/onadallm.pdf",
  latex =        "http://www.hutter1.net/publ/onadallm.tex",
  slides =       "http://www.hutter1.net/publ/sonadallm.pdf",
  poster =       "http://www.hutter1.net/publ/ponadallm.pdf",
  video =        "http://nips.cc/virtual/2023/80505",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "online learning; large language models; dynamic evaluation; context extension",
  abstract =     "We consider the problem of online finetuning the parameters of a
                  language model at test time, also known as dynamic evaluation.
                  While it is generally known that this approach improves the
                  overall predictive performance, especially when considering
                  distributional shift between training and evaluationin passing data, we
                  here emphasize the perspective that online-adaptation turns
                  parameters into temporally changing states and provides a form
                  of context-length extension with memory in weights, more in line
                  with the concept of memory in neuroscience. We pay particular
                  attention to the speed of adaptation (in terms of sample
                  efficiency), sensitivity to overall distributional drift, and
                  computational overhead for performing gradient computation and
                  parameter updates. Our empirical study provides insights on when
                  online adaptation is particularly interesting. We highlight that
                  with online adaptation the conceptual distinction between
                  in-context learning and finetuning blurs: Both are methods to
                  condition the model on previously observed tokens.",
}
@InProceedings{Hutter:23selfaixi,
  author =       "Elliot Catt and Jordi Grau-Moya and Marcus Hutter and Matthew Aitchison and Tim Genewein and Gregoire Deletang and Li Kevin Wenliang and Joel Veness",
  title =        "Self-Predictive Universal {AI}",
  booktitle =    "37th Conf. on Neural Information Processing Systems ({NeurIPS'23})",
  pages =        "1--18",
  _number =       "DM:rh/P34416",
  _editor =       "",
  address =      "New Orleans, USA",
  _month =        dec,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#selfaixi",
  http =         "http://openreview.net/forum?id=psXVkKO9No",
  pdf =          "http://www.hutter1.net/publ/selfaixi.pdf",
  poster =       "http://www.hutter1.net/publ/pselfaixi.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  keywords =     "reinforcement learning; universal; self-prediction; Bayes; planning; on-policy; intelligence; AIXI",
  abstract =     "Reinforcement Learning (RL) algorithms typically utilize
                  learning and/or planning techniques to derive effective
                  policies. Integrating both approaches has proven to be highly
                  successful in addressing complex sequential decision-making
                  challenges, as evidenced by algorithms such as AlphaZero and
                  MuZero, which consolidate the planning process into a parametric
                  search-policy. AIXI, the universal Bayes-optimal agent, leverages
                  planning through comprehensive search as its primary means to
                  find an optimal policy. Here we define an alternative universal
                  Bayesian agent, which we call Self-AIXI, that on the contrary to
                  AIXI, maximally exploits learning to obtain good policies. It
                  does so by self-predicting its own stream of action data, which
                  is generated, similarly to other TD(0) agents, by taking an
                  action maximization step over the current on-policy (universal
                  mixture-policy) Q-value estimates. We prove that Self-AIXI
                  converges to AIXI, and inherits a series of properties like
                  maximal Legg-Hutter intelligence and the self-optimizing
                  property.",
  support =      "ARC grant DP150104590",
  znote =        "Acceptance rate: 3218/12343 = 26\%",
}
@Article{Hutter:23lscm,
  author =       "Laurent Orseau and Marcus Hutter",
  title =        "Line Search for Convex Minimization",
  journal =      "\href{http://arxiv.org/abs/2307.16560}{arXiv:2307.16560}",
  pages =        "1--19",
  _month =        jul,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lscm",
  pdf =          "http://www.hutter1.net/publ/lscm.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#opt",
  keywords =     "optimization; quasiconvex; bisection; secant; gap; guarantees",
  abstract =     "We propose two algorithms: $\Delta$-Bisection is a variant of
                  bisection search that uses (sub)gradient information and
                  convexity to speed up convergence, while $\Delta$-Secant is a
                  variant of golden-section search and uses only function queries.
                  Both algorithms are based on a refined definition of the
                  \emph{optimality region} $\Delta$ containing the minimum point,
                  for general convex functions. While bisection search reduces the
                  $x$ interval by a factor 2 at every iteration,
                  $\Delta$-Bisection reduces the (sometimes much) smaller
                  $x^*$-gap $\Delta^x$ (the $x$ coordinates of $\Delta$) by
                  \emph{at least} a factor 2 at every iteration. Similarly,
                  $\Delta$-Secant also reduces the $x^*$-gap by at least a factor
                  2 every second function query. Moreover, and possibly more
                  importantly, the $y^*$-gap $\Delta^y$ (the $y$ coordinates of
                  $\Delta$) also provides a refined stopping criterion, which can
                  also be used with other algorithms. Experiments on a few convex
                  functions confirm that our algorithms are always faster than
                  their quasiconvex counterparts, often by more than a factor 2.
                  We further design a \emph{quasi-exact} line search algorithm
                  based on $\Delta$-Secant. It can be used with gradient descent
                  as a replacement for backtracking line search. We also provide
                  convergence guarantees.",
}
@InProceedings{Hutter:23ltscm,
  author =       "Laurent Orseau and Marcus Hutter and Levi HS Lelis",
  title =        "Levin Tree Search with Context Models",
  booktitle =    "Proc. 32nd International Joint Conference on Artificial Intelligence ({IJCAI'23})",
  _number =      "DM:rh/P21589",
  pages =        "5622--5630",
  _editor =       "",
  address =      "Macao, China",
  _month =        aug,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ltscm",
  arxiv =        "\href{http://arxiv.org/abs/2305.16945}{arXiv:2305.16945}",
  pdf =          "http://www.hutter1.net/publ/ltscm.pdf",
  slides =       "http://www.hutter1.net/publ/sltscm.pdf",
  poster =       "http://www.hutter1.net/publ/pltscm.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  code =         "http://github.com/google-deepmind/levintreesearch_cm",
  doi =          "10.24963/ijcai.2023/624"
  keywords =     "universal search; context; tree; Rubik's cube; Sokoban; convexity",
  abstract =     "Levin Tree Search (LTS) is a search algorithm that makes use of
                  a policy (a probability distribution over actions) and comes
                  with a theoretical guarantee on the number of expansions before
                  reaching a goal node, depending on the quality of the policy.
                  This guarantee can be used as a loss function, which we call the
                  LTS loss, to optimize neural networks representing the policy
                  (LTS+NN). In this work we show that the neural network can be
                  substituted with parameterized context models originating from
                  the online compression literature (LTS+CM). We show that the LTS
                  loss is convex under this new model, which allows for using
                  standard convex optimization tools, and obtain convergence
                  guarantees to the optimal parameters in an online setting for a
                  given set of solution trajectories -- guarantees that cannot be
                  provided for neural networks. The new LTS+CM algorithm compares
                  favorably against LTS+NN on several benchmarks: Sokoban
                  (Boxoban), The Witness, and the 24-Sliding Tile puzzle (STP).
                  The difference is particularly large on STP, where LTS+NN fails
                  to solve most of the test instances while LTS+CM solves each
                  test instance in a fraction of a second. Furthermore, we show
                  that LTS+CM is able to learn a policy that solves the Rubik's
                  cube in only a few hundred expansions, which considerably
                  improves upon previous machine learning techniques.",
  note =         "\href{http://ijcai-23.org/distinguished-paper-awards/}{Distinguished paper award}",
}
@InProceedings{Hutter:23atari5,
  author =       "Matthew Aitchison and Penny Sweetser and Marcus Hutter",
  title =        "Atari-5: Distilling the Arcade Learning Environment down to Five Games",
  booktitle =    "Proc. 40th International Conference on Machine Learning ({ICML'23})",
  _number =      "DM:rh/P?????",
  volume =       "202",
  pages =        "421--438",
  _editor =      "Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett",
  publisher =    "PMLR",
  address =      "Hawaii, USA",
  _month =        jul,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#atari5",
  http =         "http://proceedings.mlr.press/v202/aitchison23a.html",
  url =          "http://openreview.net/forum?id=xRDHjO0YBo",
  arxiv =        "\href{http://arxiv.org/abs/2210.02019}{arXiv:2210.02019}",
  pdf =          "http://proceedings.mlr.press/v202/aitchison23a/aitchison23a.pdf",
  slides =       "http://www.hutter1.net/publ/satari5.pdf",
  poster =       "http://www.hutter1.net/publ/patari5.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ai",
  code =         "http://www.hutter1.net/publ/atari5.cpp",
  keywords =     "Atari; ALE; benchmark; RL; AGI; efficiency; evaluation",
  abstract =     "The Arcade Learning Environment (ALE) has become an essential
                  benchmark for assessing the performance of reinforcement
                  learning algorithms. However, the computational cost of
                  generating results on the entire 57-game dataset limits ALE's
                  use and makes the reproducibility of many results infeasible. We
                  propose a novel solution to this problem in the form of a
                  principled methodology for selecting small but representative
                  subsets of environments within a benchmark suite. We applied our
                  method to identify a subset of five ALE games, we call Atari-5,
                  which produces 57-game median score estimates within 10\% of
                  their true values. Extending the subset to 10-games recovers
                  80\% of the variance for log-scores for all games within the
                  57-game set. We show this level of compression is possible due to 
                  a high degree of correlation between many of the games in ALE.",
  znote =        "Acceptance rate: 1827/6538 = 28\%",
}
@InProceedings{Hutter:23nnptw,
  author =       "Tim Genewein and Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Vincent Dutordoir and Jordi Grau-Moya and Laurent Orseau and Marcus Hutter and Joel Veness",
  title =        "Memory-Based Meta-Learning on Non-Stationary Distributions",
  booktitle =    "Proc. 40th International Conference on Machine Learning ({ICML'23})",
  _number =      "DM:rh/P23814",
  volume =       "202",
  pages =        "11173--11195",
  _editor =      "Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett",
  publisher =    "PMLR",
  address =      "Hawaii, USA",
  _month =        jul,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#nnptw",
  http =         "http://proceedings.mlr.press/v202/genewein23a.html",
  url =          "http://openreview.net/forum?id=gyHGzyIuEJ",
  arxiv =        "\href{http://arxiv.org/abs/2302.03067}{arXiv:2302.03067}",
  pdf =          "http://proceedings.mlr.press/v202/genewein23a/genewein23a.pdf",
  slides =       "http://www.hutter1.net/publ/snnptw.pdf",
  poster =       "http://www.hutter1.net/publ/pnnptw.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "Partition Tree Weighting; in-context learning; non-stationary; RNN; LSTM; Transformer; Bayes-optimal",
  abstract =     "Memory-based meta-learning is a technique for approximating
                  Bayes-optimal predictors. Under fairly general conditions,
                  minimizing sequential prediction error, measured by the log
                  loss, leads to implicit meta-learning. The goal of this work is
                  to investigate how far this interpretation can be realized by
                  current sequence prediction models and training regimes. The
                  focus is on piecewise stationary sources with unobserved
                  switching-points, which arguably capture an important
                  characteristic of natural language and action-observation
                  sequences in partially observable environments. We show that
                  various types of memory-based neural models, including
                  Transformers, LSTMs, and RNNs can learn to accurately
                  approximate known Bayes-optimal algorithms and behave as if
                  performing Bayesian inference over the latent switching-points
                  and the latent parameters governing the data distribution within
                  each segment.",
  znote =        "Acceptance rate: 1827/6538 = 28\%",
}
@article{Hutter:23potmmcp,
  author =       "Jonathon Schwartz and Hanna Kurniawati and Marcus Hutter",
  title =        "Combining a Meta-Policy and Monte-Carlo Planning for Scalable Type-Based Reasoning in Partially Observable Environments",
  journal =      "\href{http://arxiv.org/abs/2306.06067}{arXiv:2306.06067}",
  _number =       "DM:rh/P29163",
  pages =        "1--24",
  _month =        jun,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#potmmcp",
  pdf =          "http://www.hutter1.net/publ/potmmcp.pdf",
  slides =       "http://www.hutter1.net/publ/spotmmcp.pdf",
  poster =       "http://dl.acm.org/doi/10.5555/3545946.3598932",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  keywords =     "Multi-Agent; POSG; Type-Based Reasoning; Planning under Uncertainty; MCTS",
  abstract =     "The design of autonomous agents that can interact effectively
                  with other agents without prior coordination is a core problem
                  in multi-agent systems. Type-based reasoning methods achieve
                  this by maintaining a belief over a set of potential behaviours
                  for the other agents. However, current methods are limited in
                  that they assume full observability of the state and actions of
                  the other agent or do not scale efficiently to larger problems
                  with longer planning horizons. Addressing these limitations, we
                  propose Partially Observable Type-based Meta Monte-Carlo
                  Planning (POTMMCP) - an online Monte-Carlo Tree Search based
                  planning method for type-based reasoning in large partially
                  observable environments. POTMMCP incorporates a novel
                  meta-policy for guiding search and evaluating beliefs, allowing
                  it to search more effectively to longer horizons using less
                  planning time. We show that our method converges to the optimal
                  solution in the limit and empirically demonstrate that it
                  effectively adapts online to diverse sets of other agents across
                  a range of environments. Comparisons with the state-of-the art
                  method on problems with up to $10^{14}$ states and $10^8$
                  observations indicate that POTMMCP is able to compute better
                  solutions significantly faster.",
  overleaf =     "http://www.overleaf.com/project/63c934e3aecfa447d7c14968",
}
@InProceedings{Hutter:23nnchomsky,
  author =       "Gregoire Deletang and Anian Ruoss and Jordi Grau-Moya and Tim Genewein and Li Kevin Wenliang and Elliot Catt and Chris Cundy and Marcus Hutter and Shane Legg and Joel Veness and Pedro A Ortega",
  title =        "Neural Networks and the {Chomsky} Hierarchy",
  booktitle =    "Proc. 11th International Conference on Learning Representations ({ICLR'23})",
  _number =      "DM:rh/P5637",
  address =      "Kigali, Rwanda",
  _month =        may,
  year =         "2023",
  bibtex =       "http://www.hutter1.net/official/bib.htm#nnchomsky",
  http =         "http://openreview.net/forum?id=WbxHAzkeQcn",
  arxiv =        "http://arxiv.org/abs/2207.02098",
  pdf =          "http://www.hutter1.net/publ/nnchomsky.pdf",
  slides =       "http://www.hutter1.net/publ/snnchomsky.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  press =        "Trended SuperHot in deeplearn.org in Jul'2022.
                  23Jan23 Number 9 on hacker news http://news.ycombinator.com/item?id=34485631",
  keywords =     "length generalization; memory-augmented neural networks; recurrent neural networks",
  abstract =     "Reliable generalization lies at the heart of safe ML and AI. 
                  However, understanding when and how neural networks generalize
                  remains one of the most important unsolved problems in the
                  field. In this work, we conduct an extensive empirical study
                  (20'910 models, 15 tasks) to investigate whether insights from
                  the theory of computation can predict the limits of neural
                  network generalization in practice. We demonstrate that grouping
                  tasks according to the Chomsky hierarchy allows us to forecast
                  whether certain architectures will be able to generalize to
                  out-of-distribution inputs. This includes negative results where
                  even extensive amounts of data and training time never lead to
                  any non-trivial generalization, despite models having sufficient
                  capacity to fit the training data perfectly. Our results show
                  that, for our subset of tasks, RNNs and Transformers fail to
                  generalize on non-regular tasks, LSTMs can solve regular and
                  counter-language tasks, and only networks augmented with
                  structured memory (such as a stack or memory tape) can
                  successfully generalize on context-free and context-sensitive
                  tasks.",
  znote =        "(Spotlight) acceptance rate: (400)1590/5000 = (8\%)32\%",
}
@inproceedings{Hutter:23agmix,
  title        = {Universal Agent Mixtures and the Geometry of Intelligence},
  author       = {Alexander, Samuel Allen and Quarel, David and Du, Len and Hutter, Marcus},
  year         = 2023,
  booktitle    = {International Conference on Artificial Intelligence and Statistics},
  pages        = {4231--4246},
  url          = {http://proceedings.mlr.press/v206/alexander23a.html},
  _month       = apr,
  bibtex       = {http://www.hutter1.net/official/bib.htm#agmix},
  organization = {PMLR},
  arxiv        = {http://arxiv.org/abs/2302.06083},
  video        = {http://youtu.be/yfzXvkZx2pw}
}
@misc{Hutter:23glcbpatent,
  title        = {Gated linear contextual bandits},
  author       = {Sezener, Eren and Veness, Joel William and Hutter, Marcus and Wang, Jianan and Budden, David},
  year         = 2023,
  publisher    = {Google Patents},
  note         = {US Patent App. 17/766,854},
  _month       = mar,
  bibtex       = {http://www.hutter1.net/official/bib.htm#glcbpatent}
}
@inproceedings{Hutter:23switchmdl,
  title        = {Evaluating Representations with Readout Model Switching},
  author       = {Yazhe Li and Jorg Bornschein and Marcus Hutter},
  year         = 2023,
  booktitle    = {11th International Conference on Learning Representations},
  url          = {http://openreview.net/forum?id=Fsd-6ax4T1m},
  _month       = mar,
  bibtex       = {http://www.hutter1.net/official/bib.htm#switchmdl}
}
@inproceedings{Hutter:23preqnn,
  title        = {Sequential Learning of Neural Networks for Prequential {MDL}},
  author       = {Bornschein, Jorg and Li, Yazhe and Hutter, Marcus},
  year         = 2023,
  booktitle    = {11th International Conference on Learning Representations},
  url          = {http://openreview.net/forum?id=h0MfjMHHNr},
  _month       = mar,
  bibtex       = {http://www.hutter1.net/official/bib.htm#preqnn}
}
@article{Hutter:23uclip,
  title        = {U-Clip: On-Average Unbiased Stochastic Gradient Clipping},
  author       = {Elesedy, Bryn and Hutter, Marcus},
  year         = 2023,
  journal      = {\href{http://arxiv.org/abs/2302.02971}{arXiv:2302.02971}},
  bibtex       = {http://www.hutter1.net/official/bib.htm#uclip},
  _month       = feb
}

%-------------Publications-of-Marcus-Hutter-2022--------------%

@article{Hutter:22ncollapsex,
  title        = {Generalization bounds for transfer learning with pretrained classifiers},
  author       = {Galanti, Tomer and Gyorgy, Andras and Hutter, Marcus},
  year         = 2022,
  journal      = {\href{http://arxiv.org/abs/2212.12532}{arXiv:2212.12532}},
  _month       = dec,
  bibtex       = {http://www.hutter1.net/official/bib.htm#ncollapsex}
}
@misc{Hutter:22agiwirehs,
  title        = {The Danger of Advanced Artificial Intelligence Controlling Its Own Feedback},
  author       = {Michael K. Cohen and Marcus Hutter},
  year         = 2022,
  journal      = {The Conversation},
  bibtex       = {http://www.hutter1.net/official/bib.htm#agiwirehs},
  _month       = oct,
  howpublished = {http://theconversation.com/the-danger-of-advanced-artificial-intelligence-controlling-its-own-feedback-190445}
}
@article{Hutter:22bbayes,
  title        = {Beyond {Bayes}-optimality: meta-learning what you know you don't know},
  author       = {Grau-Moya, Jordi and Deletang, Gregoire and Kunesch, Markus and Genewein, Tim and Catt, Elliot and Li, Kevin and Ruoss, Anian and Cundy, Chris and Veness, Joel and Wang, Jane and others},
  year         = 2022,
  journal      = {\href{http://arxiv.org/abs/2209.15618}{arXiv:2209.15618}},
  _month       = sep,
  bibtex       = {http://www.hutter1.net/official/bib.htm#bbayes},
  arxiv        = {http://arxiv.org/abs/2209.15618},
  znote        = {Rank 1 at deeplearn.org, SuperHot for 2+ weeks, Front page of HackerNews: http://news.ycombinator.com/item?id=32163324 http://syncedreview.com/2022/07/25/deepmind-paper-provides-a-mathematically-precise-overview-of-transformer-architectures-and-algorithms/}
}
@article{Hutter:22agiwireh,
  title        = {Advanced artificial agents intervene in the provision of reward},
  author       = {Cohen, Michael and Hutter, Marcus and Osborne, Michael},
  year         = 2022,
  journal      = {AI magazine},
  volume       = 43,
  number       = 3,
  pages        = {282--293},
  doi          = {http://doi.org/10.1002/aaai.12064},
  _month       = aug,
  bibtex       = {http://www.hutter1.net/official/bib.htm#agiwireh}
}
@inproceedings{Hutter:22binaixi,
  title        = {On Reward Binarisation and {Bayesian} Agents},
  author       = {Elliot Catt and Marcus Hutter and Joel Veness},
  year         = 2022,
  booktitle    = {15th European Workshop on Reinforcement Learning ({EWRL-15})},
  url          = {http://ewrl.files.wordpress.com/2022/09/ewrl22_submission.pdf},
  note         = {\url{http://ewrl.files.wordpress.com/2022/09/ewrl22_submission.pdf}},
  _month       = sep,
  pdf          = {http://www.hutter1.net/publ/binaixi.pdf},
  keywords     = {}
}
@InProceedings{Hutter:22compcon,
  author =       "Elliot Catt and Marcus Hutter and Joel Veness",
  title =        "Reinforcement Learning with Information-Theoretic Actuation",
  booktitle =    "Proc. 15th International Conference on Artificial General Intelligence ({AGI'22})",
  series =       "LNCS",
  volume =       "13539",
  pages =        "188--198",
  _editor =       "Ben Goertzel and Matt Iklé and Alexey Potapov and Denis Ponomaryov",
  publisher =    "Springer",
  address =      "Seattle, WA, USA",
  _month =        aug,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#compcon",
  url =          "http://arxiv.org/abs/2109.15147",
  pdf =          "http://www.hutter1.net/publ/compcon.pdf",
  slides =       "http://www.hutter1.net/publ/scompcon.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-3-031-19906-6",
  doi =          "10.1007/978-3-031-19907-3_18"
  keywords =     "Reinforcement Learning; large action spaces; compression; coding; internal actions; sampling.",
  abstract =     "Reinforcement Learning formalises an embodied agent's
                  interaction with the environment through observations, rewards
                  and actions. But where do the actions come from? Actions are
                  often considered to represent something external, such as the
                  movement of a limb, a chess piece, or more generally, the output
                  of an actuator. In this work we explore and formalize a
                  contrasting view, namely that actions are best thought of as the
                  output of a sequence of internal choices with respect to an
                  action model. This view is particularly well-suited for
                  leveraging the recent advances in large sequence models as prior
                  knowledge for multi-task reinforcement learning problems. Our
                  main contribution in this work is to show how to augment the
                  standard MDP formalism with a sequential notion of internal
                  action using information-theoretic techniques, and that this
                  leads to self-consistent definitions of both internal and
                  external action value functions.",
  support =      "ARC grant DP150104590",
  for =          "461105(50%),461301(50%)",
}
@InProceedings{Hutter:22ncollapse2,
  author =       "Tomer Galanti and Andras Gyorgy and Marcus Hutter",
  title =        "Improved Generalization Bounds for Transfer Learning via Neural Collapse",
  booktitle =    "First Workshop on Pre-training: Perspectives, Pitfalls, and Paths Forward at ICML 2022",
  _number =      "DM:rh/P6844",
  pages =        "1--6",
  address =      "Baltimore, MD, USA",
  _month =        jul,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ncollapse2",
  url =          "http://openreview.net/forum?id=VrK7pKwOhT_",
  pdf =          "http://www.hutter1.net/publ/ncollapse2.pdf",
  latex =        "http://www.hutter1.net/publ/ncollapse2.tex",
  slides =       "http://www.hutter1.net/publ/sncollapse2.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "neural collapse; transfer learning; classification",
  abstract =     "Using representations learned by large, pretrained models, 
                  also called foundation models, in new tasks with fewer data 
                  has been successful in a wide range of machine learning problems. 
                  Recently, Galanti et al. (2022) introduced a theoretical framework 
                  for studying this transfer learning setting for classification. 
                  Their analysis is based on the recently observed phenomenon 
                  that the features learned by overparameterized deep classification networks 
                  show an interesting clustering property, called neural collapse (Papyan et al. 2020). 
                  A cornerstone of their analysis demonstrates that neural collapse 
                  generalizes from the source classes to new target classes. 
                  However, this analysis is limited as it relies on several unrealistic assumptions. 
                  In this work, we provide an improved theoretical analysis 
                  significantly relaxing these modeling assumptions.",
}
@TechReport{Hutter:22exiid,
  author =       "Marcus Hutter",
  title =        "Testing Independence of Exchangeable Random Variables",
  institution =  "DeepMind",
  address =      "London, UK",
  _month =        oct,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#exiid",
  http =         "http://arxiv.org/abs/2210.12392",
  pdf =          "http://www.hutter1.net/publ/exiid.pdf",
  slides =       "http://www.hutter1.net/publ/sexiid.pdf",
  video =        "http://youtu.be/xb1ZYG4rP_Y",
  project =      "http://www.hutter1.net/official/projects.htm#stat",
  keywords =     "independent; identically distributed; exchangeable 
                  random variables; statistical tests; unstructured data.",
  abstract =     "Given well-shuffled data, can we determine whether the data
                  items are statistically (in)dependent? Formally, we consider the
                  problem of testing whether a set of exchangeable random
                  variables are independent. We will show that this is possible
                  and develop tests that can confidently reject the null
                  hypothesis that data is independent and identically distributed
                  and have high power for (some) exchangeable distributions. We
                  will make no structural assumptions on the underlying sample
                  space. One potential application is in Deep Learning, where data
                  is often scraped from the whole internet, with duplications
                  abound, which can render data non-iid and test-set evaluation
                  prone to give wrong answers.",
}
@Article{Hutter:22clogames,
  author =       "Mikael Böörs and Tobias Wängberg and Tom Everitt and Marcus Hutter",
  title =        "Classification by Decomposition: A Novel Approach to Classification of Symmetric 2 x 2 Games",
  journal =      "Theory and Decision",
  volume =       "23",
  number =       "3",
  pages =        "463--508",
  publisher =    "Springer",
  _month =        oct,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#clogames",
  url =          "http://link.springer.com/article/10.1007/s11238-021-09850-z",
  pdf =          "http://www.hutter1.net/publ/clogames.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#agents",
  issn =         "1573-7187",
  doi =          "10.1007/s11238-021-09850-z"
  keywords =     "Classification; Symmetric games; 2×2 Games; Decomposition; Cooperation and conflict; Simplicity",
  abstract =     "In this paper, we provide a detailed review of previous
                  classifications of 2×2 games and suggest a mathematically simple
                  way to classify the symmetric 2×2 games based on a decomposition
                  of the payoff matrix into a cooperative and a zero-sum part. We
                  argue that differences in the interaction between the parts is
                  what makes games interesting in different ways. Our claim is
                  supported by evolutionary computer experiments and findings in
                  previous literature. In addition, we provide a method for using
                  a stereographic projection to create a compact 2-d
                  representation of the game space.",
  for =          "460202(33%),380303(33%),460209(33%)",
}
@Article{Hutter:22oimilearn,
  author =       "Michael K. Cohen and Marcus Hutter and Neel Nanda",
  title =        "Fully General Online Imitation Learning",
  journal =      "Journal of Machine Learning Research",
  volume =       "23",
  number =       "334",
  pages =        "1--30",
  publisher =    "Microtome",
  _month =        oct,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#oimilearn",
  http =         "http://www.jmlr.org/papers/v23/21-0618.html",
  url =          "http://arxiv.org/abs/2102.08686",
  pdf =          "http://www.hutter1.net/publ/oimilearn.pdf",
  slides =       "http://www.hutter1.net/publ/soimilearn.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  keywords =     "Bayesian sequence prediction; imitation learning; active learning; general environments",
  abstract =     "In imitation learning, imitators and demonstrators are policies
                  for picking actions given past interactions with the
                  environment. If we run an imitator, we probably want events to
                  unfold similarly to the way they would have if the demonstrator
                  had been acting the whole time. No existing work provides formal
                  guidance in how this might be accomplished, instead restricting
                  focus to environments that restart, making learning unusually
                  easy, and conveniently limiting the significance of any mistake.
                  We address a fully general setting, in which the (stochastic)
                  environment and demonstrator never reset, not even for training
                  purposes. Our new conservative Bayesian imitation learner
                  underestimates the probabilities of each available action, and
                  queries for more data with the remaining probability. Our main
                  result: if an event would have been unlikely had the
                  demonstrator acted the whole time, that event's likelihood can
                  be bounded above when running the (initially totally ignorant)
                  imitator instead. Meanwhile, queries to the demonstrator rapidly
                  diminish in frequency.",
  support =      "ARC grant DP150104590",
}
@TechReport{Hutter:22transalg,
  author =       "Mary Phuong and Marcus Hutter",
  title =        "Formal Algorithms for Transformers",
  institution =  "DeepMind",
  address =      "London, UK",
  _month =        jul,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#transalg",
  http =         "http://arxiv.org/abs/2207.09238",
  pdf =          "http://www.hutter1.net/publ/transalg.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "formal algorithms, pseudocode, transformers, attention, encoder, 
                  decoder, BERT, GPT, Gopher, tokenization, training, inference.",
  abstract =     "This document aims to be a self-contained, mathematically
                  precise overview of transformer architectures and algorithms
                  (\emph{not} results). It covers what transformers are, how they
                  are trained, what they are used for, their key architectural
                  components, and a preview of the most prominent models. Complete
                  pseudocode is provided. The reader is assumed to be familiar
                  with basic ML terminology and simpler neural network
                  architectures such as MLPs.",
  note =         "LaTeX source available at http://arxiv.org/abs/2207.09238",
}
@InProceedings{Hutter:22ncollapse,
  author =       "Tomer Galanti and Andras Gyorgy and Marcus Hutter",
  title =        "On the Role of Neural Collapse in Transfer Learning",
  booktitle =    "Proc. 10th International Conference on Learning Representations ({ICLR'22})",
  address =      "Virtual, Earth",
  _month =        apr,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ncollapse",
  http =         "http://openreview.net/forum?id=OOWsE-Mz-ro",
  url =          "http://arxiv.org/abs/2112.15121",
  pdf =          "http://www.hutter1.net/publ/ncollapse.pdf",
  slides =       "http://www.hutter1.net/publ/sncollapse.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "transfer learning; neural collapse; foundation models; few-shot learning",
  abstract =     "We study the ability of foundation models to learn
                  representations for classification that are transferable to new,
                  unseen classes. Recent results in the literature show that
                  representations learned by a single classifier over many classes
                  are competitive on few-shot learning problems with
                  representations learned by special-purpose algorithms designed
                  for such problems. In this paper we provide an explanation for
                  this behavior based on the recently observed phenomenon that the
                  features learned by overparameterized classification networks
                  show an interesting clustering property, called neural collapse.
                  We demonstrate both theoretically and empirically that neural
                  collapse generalizes to new samples from the training classes,
                  and -- more importantly -- to new classes as well, allowing
                  foundation models to provide feature maps that work well in
                  transfer learning and, specifically, in the few-shot setting.",
  znote =        "Acceptance rate: 1095/3391 = 32\%.
                  Trended SuperHot in deeplearn.org in Jan'2022",
}
@TechReport{Hutter:22invmdp,
  author =       "Marcus Hutter and Steven Hansen",
  title =        "Uniqueness and Complexity of Inverse MDP Models",
  institution =  "DeepMind",
  address =      "London",
  number =       "rh/P2466",
  _month =        feb,
  year =         "2022",
  bibtex =       "http://www.hutter1.net/official/bib.htm#invmdp",
  pdf =          "http://www.hutter1.net/publ/invmdp.pdf",
  slides =       "http://www.hutter1.net/publ/sinvmdp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  keywords =     "inverse models; reinforcement learning; causality; theory; multi-step models; planning",
  abstract =     "What is the action sequence aa'a'' that was likely responsible for reaching state s''' (from state s) in 3 steps?
                  Addressing such questions is important in causal reasoning and in reinforcement learning.
                  Inverse ''MDP'' models p(aa'a''|ss''') can be used to answer them.
                  In the traditional ``forward'' view, transition ``matrix'' p(s'|sa) and policy π(a|s) uniquely determine ``everything'':
                  the whole dynamics p(as'a's''a''...|s), and with it, the action-conditional state process p(s's''...|saa'a''), 
                  the multi-step inverse models p(aa'a''...|ss^i), etc.
                  If the latter is our primary concern,  a natural question, analogous to the forward case
                  is to which extent 1-step inverse model p(a|ss') plus policy π(a|s) 
                  determine the multi-step inverse models or even the whole dynamics.
                  In other words, can forward models be inferred from inverse models or even be side-stepped.
                  This work addresses this question and variations thereof,
                  and also whether there are efficient decision/inference algorithms for this.",
}

%-------------Publications-of-Marcus-Hutter-2021--------------%

@TechReport{Hutter:21isotuning,
  author =       "Laurent Orseau and Marcus Hutter",
  title =        "Isotuning with Applications to Scale-Free Online Learning",
  institution =  "DeepMind",
  address =      "London",
  number =       "http://arxiv.org/abs/2112.14586",
  pages =        "1--32",
  _month =        dec,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#isotuning",
  url =          "http://arxiv.org/abs/2112.14586",
  pdf =          "http://www.hutter1.net/publ/isotuning.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mixed",
  keywords =     "online learning; convex optimization; regret analysis; adaptive learning rate; 
                  scale-free; anytime; unbounded loss; unbounded domain",
                  abstract =     "We extend and combine several tools of the literature to design
                  fast, adaptive, anytime and scale-free online learning
                  algorithms. Scale-free regret bounds must scale linearly with
                  the maximum loss, both toward large losses and toward very small
                  losses. Adaptive regret bounds demonstrate that an algorithm can
                  take advantage of easy data and potentially have constant
                  regret. We seek to develop fast algorithms that depend on as few
                  parameters as possible, in particular they should be anytime and
                  thus not depend on the time horizon. Our first and main tool,
                  isotuning, is a generalization of the idea of designing adaptive
                  learning rates that balance the trade-off of the regret. We
                  provide a simple and versatile theorem that can be applied to a
                  wide range of settings, and competes with the best balancing in
                  hindsight within a factor 2. The second tool is an online
                  correction, which allows us to obtain centered bounds for many
                  algorithms, to prevent the regret bounds from being vacuous when
                  the domain is overly large or only partially constrained. The
                  last tool, null updates, prevents the algorithm from performing
                  overly large updates, which could result in unbounded regret, or
                  even invalid updates. We develop a general theory to combine all
                  these tools and apply it to several standard algorithms. In
                  particular, we (almost entirely) restore the adaptivity to small
                  losses of FTRL for unbounded domains, design and prove
                  scale-free adaptive guarantees for a variant of Mirror Descent
                  (at least when the Bregman divergence is convex in its second
                  argument), extend Adapt-ML-Prod to scale-free guarantees, and
                  provide several additional contributions about Prod, AdaHedge,
                  BOA and Soft-Bayes.",
  for =          "490304(50%),461199(50%)",
}
@InProceedings{Hutter:21symintel,
  author =       "Samuel Allen Alexander and Marcus Hutter",
  title =        "Reward-Punishment Symmetric Universal Intelligence",
  booktitle =    "Proc. 14th Conf. on Artificial General Intelligence ({AGI'21})",
  address =      "San Francisco, USA",
  series =       "LNAI",
  volume =       "13154",
  pages =        "1--10",
  _editor =       "Ben Goertzel and Matthew Iklé and Alexey Potapov",
  publisher =    "Springer",
  _month =        oct,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#symintel",
  url =          "http://arxiv.org/abs/2110.02450",
  pdf =          "http://www.hutter1.net/publ/symintel.pdf",
  slides =       "http://www.hutter1.net/publ/ssymintel.pdf",
  video =        "http://youtu.be/CnsqHSCBgX0?t=30250",
  qanda =        "http://youtu.be/CnsqHSCBgX0?t=32165",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-030-93758-4_1",
  issn =         "0302-9743",
  isbn =         "978-3-030-93757-7",
  keywords =     "Universal intelligence; Intelligence measures; Reinforcement learning.",
  abstract =     "Can an agent's intelligence level be negative? We extend the
                  Legg-Hutter agent-environment framework to include punishments
                  and argue for an affirmative answer to that question. We show
                  that if the background encodings and Universal Turing Machine
                  (UTM) admit certain Kolmogorov complexity symmetries, then the
                  resulting Legg-Hutter intelligence measure is symmetric about
                  the origin. In particular, this implies reward-ignoring agents
                  have Legg-Hutter intelligence 0 according to such UTMs.",
  for =           "461105(40%),460202(30%),500312(30%)",
}
@TechReport{Hutter:21causalseq,
  author =       "Pedro A. Ortega, Markus Kunesch, Grégoire Delétang, Tim Genewein, Jordi Grau-Moya, Joel Veness, Jonas Buchli, Jonas Degrave, Bilal Piot, Julien Perolat, Tom Everitt, Corentin Tallec, Emilio Parisotto, Tom Erez, Yutian Chen, Scott Reed, Marcus Hutter, Nando de Freitas, Shane Legg",
  title =        "Shaking the Foundations: Delusions in Sequence Models for Interaction and Control",
  institution =  "DeepMind",
  address =      "London",
  number =       "http://arxiv.org/abs/2110.10819",
  pages =        "1--16",
  _month =        oct,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#causalseq",
  url =          "http://arxiv.org/abs/2110.10819",
  pdf =          "http://www.hutter1.net/publ/causalseq.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mixed",
  keywords =     "sequence models; sequential prediction; reinforcement learning; causality; self-delusion",
  abstract =     "The recent phenomenal success of language models has
                  reinvigorated machine learning research, and large sequence
                  models such as transformers are being applied to a variety of
                  domains. One important problem class that has remained
                  relatively elusive however is purposeful adaptive behavior.
                  Currently there is a common perception that sequence models
                  "lack the understanding of the cause and effect of their
                  actions" leading them to draw incorrect inferences due to
                  auto-suggestive delusions. In this report we explain where this
                  mismatch originates, and show that it can be resolved by
                  treating actions as causal interventions. Finally, we show that
                  in supervised learning, one can teach a system to condition or
                  intervene on data by training with factual and counterfactual
                  error signals respectively.",
  for =          "460202(40%),461199(30%),461103(30%)",
}
@Article{Hutter:21ai4hum,
  author =       "Reinhard Hutter and Marcus Hutter",
  title =        "Chances and Risks of Artificial Intelligence — A Concept of Developing and Exploiting Machine Intelligence for Future Societies",
  journal =      "Applied System Innovation",
  volume =       "4",
  number =       "2",
  pages =        "1--19",
  publisher =    "MDPI",
  _month =        jun,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ai4hum",
  http =         "https://www.mdpi.com/2571-5577/4/2/37",
  xurl =         "http://arxiv.org/abs/2106.none",
  pdf =          "http://www.hutter1.net/publ/ai4hum.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  issn =         "2571-5577",
  doi =          "10.3390/asi4020037",
  keywords =     "artificial and human intelligence; security; risks and risk management; 
                  quality of life; common welfare; socio-political assessment",
  abstract =     "Artificial Intelligence (AI): Boon or Bane for societies? AI
                  technologies and solutions—as most revolutionary technologies
                  have done in the past—offer negative implications on the one
                  hand and considerable positive potential on the other. Avoiding
                  the former and fostering the latter will require substantial
                  investments in future societal concepts, research and
                  development, and control of AI-based solutions in AI security
                  while avoiding abuse. Preparation for the future role of AI in
                  societies should strive towards the implementation of related
                  methods and tools for risk management, models of complementary
                  human–machine cooperation, strategies for the optimization of
                  production and administration, and innovative concepts for the
                  distribution of the economic value created. Two extreme possible
                  “end states” of AI impact (if there is ever an end state) that
                  are being discussed at present may manifest as (a) uncontrolled
                  substitution by AI of major aspects of production, services, and
                  administrative and decision-making processes, leading to
                  unprecedented risks such as high unemployment, and devaluation
                  and the underpayment of people in paid work, resulting in
                  inequality in the distribution of wealth and employment,
                  diminishing social peace, social cohesion, solidarity, security,
                  etc., or, on the contrary, (b) the freeing of people from
                  routine labor through increased automation in production,
                  administration and services, and changing the constitution of
                  politics and societies into constituencies with high ethical
                  standards, personal self-determination, and the general
                  dominance of humane principles, as opposed to pure materialism.
                  Any mix of these two extremes could develop, and these
                  combinations may vary among different societies and political
                  systems.",
  for =          "460299(25%),440710(25%),440711(25%),441004(25%)",
}
@Article{Hutter:21alignx,
  author =       "Tom Everitt and Marcus Hutter and Ramana Kumar and Victoria Krakovna",
  title =        "Reward Tampering Problems and Solutions in Reinforcement Learning: A Causal Influence Diagram Perspective",
  journal =      "Synthese",
  xvolume =       "??",
  xnumber =       "??",
  xpages =        "??-??",
  publisher =    "Springer",
  _month =        may,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#alignx",
  url =          "http://arxiv.org/abs/1908.04734",
  pdf =          "http://www.hutter1.net/publ/alignx.pdf",
  slides =       "http://www.hutter1.net/publ/salign.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  code =         "http://www.hutter1.net/publ/align.cpp",
  issn =         "0039-7857",
  doi =          "10.1007/s11229-021-03141-4",
  keywords =     "AI safety, reinforcement learning, Bayesian learning, causal graphs",
  abstract =     "Can humans get arbitrarily capable reinforcement learning (RL)
                  agents to do their bidding? Or will sufficiently capable RL
                  agents always find ways to bypass their intended objectives by
                  shortcutting their reward signal? This question impacts how far
                  RL can be scaled, and whether alternative paradigms must be
                  developed in order to build safe artificial general
                  intelligence. In this paper, we study when an RL agent has an
                  instrumental goal to tamper with its reward process, and
                  describe design principles that prevent instrumental goals for
                  two different types of reward tampering (reward function
                  tampering and RF-input tampering). Combined, the design
                  principles can prevent both types of reward tampering from being
                  instrumental goals. The analysis benefits from causal influence
                  diagrams to provide intuitive yet precise formalizations.",
  support =      "ARC grant DP150104590",
  for =          "461105(33%),460202(33%),500306(33%)",
}
@Article{Hutter:21ccamfrl,
  author =       "Thomas Mesnard, Théophane Weber, Fabio Viola, Shantanu Thakoor, Alaa Saade, Anna Harutyunyan, Will Dabney, Tom Stepleton, Nicolas Heess, Arthur Guez, Marcus Hutter, Lars Buesing, Rémi Munos",
  title =        "Counterfactual Credit Assignment in Model-Free Reinforcement Learning",
  journal =      "Journal of Machine Learning Research, W\&CP: ICML",
  volume =       "139",
  pages =        "7654--7664",
  _editor =       "Marina Meila and Tong Zhang",
  _month =        jul,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ccamfrl",
  url =          "http://arxiv.org/abs/2011.09464",
  pdf =          "http://www.hutter1.net/publ/ccamfrl.pdf",
  slides =       "https://icml.cc/virtual/2021/poster/9795",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  keywords =     "Reinforcement Learning; Planning; Deep RL",
  abstract =     "Credit assignment in reinforcement learning is the problem of
                  measuring an action’s influence on future rewards. In
                  particular, this requires separating skill from luck, i.e.
                  disentangling the effect of an action on rewards from that of
                  external factors and subsequent actions. To achieve this, we
                  adapt the notion of counterfactuals from causality theory to a
                  model-free RL setup. The key idea is to condition value
                  functions on future events, by learning to extract relevant
                  information from a trajectory. We formulate a family of policy
                  gradient algorithms that use these future-conditional value
                  functions as baselines or critics, and show that they are
                  provably low variance. To avoid the potential bias from
                  conditioning on future information, we constrain the hindsight
                  information to not contain information about the agent's
                  actions. We demonstrate the efficacy and validity of our
                  algorithm on a number of illustrative and challenging problems.",
  for =          "461105(100%)",
  znote =        "Acceptance rate: 1184/5513   = 21\%",
}
@Article{Hutter:21ckillcat,
  author =       "Michael K. Cohen and Marcus Hutter and Elliot Catt",
  title =        "Curiosity Killed or Incapacitated the Cat and the Asymptotically Optimal Agent",
  journal =      "IEEE Journal on Selected Areas in Information Theory",
  volume =       "2",
  number =       "2",
  pages =        "665--677",
  publisher =    "IEEE",
  _month =        may,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ckillcat",
  url =          "http://arxiv.org/abs/2006.03357",
  pdf =          "http://www.hutter1.net/publ/ckillcat.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  issn =         "2641-8770",
  doi =          "10.1109/JSAIT.2021.3079722",
  keywords =     "Artificial intelligence; learning; autonomous agents;
                  Bayes methods; information theory; inference algorithms; 
                  history; reinforcement learning; Markov processes",
  abstract =     "Reinforcement learners are agents that learn to pick actions
                  that lead to high reward. Ideally, the value of a reinforcement
                  learner’s policy approaches optimality—where the optimal
                  informed policy is the one which maximizes reward.
                  Unfortunately, we show that if an agent is guaranteed to be
                  “asymptotically optimal” in any (stochastically computable)
                  environment, then subject to an assumption about the true
                  environment, this agent will be either “destroyed” or
                  “incapacitated” with probability 1. Much work in reinforcement
                  learning uses an ergodicity assumption to avoid this problem.
                  Often, doing theoretical research under simplifying assumptions
                  prepares us to provide practical solutions even in the absence
                  of those assumptions, but the ergodicity assumption in
                  reinforcement learning may have led us entirely astray in
                  preparing safe and effective exploration strategies for agents
                  in dangerous environments. Rather than assuming away the
                  problem, we present an agent, Mentee, with the modest guarantee
                  of approaching the performance of a mentor, doing safe
                  exploration instead of reckless exploration. Critically,
                  Mentee’s exploration probability depends on the expected
                  information gain from exploring. In a simple non-ergodic
                  environment with a weak mentor, we find Mentee outperforms
                  existing asymptotically optimal agents and its mentor.",
  support =      "ARC grant DP150104590",
  for =          "460202(33%),460209(33%),461105(33%)",
}
@patent{Hutter:21glcbpatent,
  author =       "Eren Sezener and Joel Veness and Marcus Hutter and Jianan Wang and David Budden",
  title =        "Gated Linear Contextual Bandits",
  _month =        apr,
  year =         "2021",
  number =       "WO2021069574A1",
  appl_number =  "PCT/EP2020/078259",
  _another_number = "45288-0091 WO1",
  journal =      "DeepMind",
  url =          "https://worldwide.espacenet.com/patent/search?q=pn%3DWO2021069574A1",
  pdf =          "http://www.hutter1.net/publ/glcbpatent.pdf",
  type =         "patent",
  abstract =     "Methods, systems, and apparatus, including computer programs
                  encoded on computer storage media, for selecting actions in
                  response to each context in a sequence of context inputs. One of
                  the methods includes maintaining data specifying a respective
                  gated linear network corresponding to each of the plurality of
                  actions; for each context in the sequence of contexts: for each
                  action, processing the context using the gated linear network
                  corresponding to the action to generate a predicted probability;
                  for each action, generating an action score for the action from
                  at least the predicted probability; and selecting the action to
                  be performed in response to the context based on the action
                  scores.",
  for =          "461104(100%)",
}
@Article{Hutter:21bomaix,
  author =       "Michael K. Cohen and Badri Vellambi and Marcus Hutter",
  title =        "Intelligence and Unambitiousness Using Algorithmic Information Theory",
  journal =      "IEEE Journal on Selected Areas in Information Theory",
  volume =       "2",
  number =       "2",
  pages =        "678--690",
  publisher =    "IEEE",
  _month =        apr,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#bomaix",
  url =          "http://arxiv.org/abs/2105.06268",
  pdf =          "http://www.hutter1.net/publ/bomaix.pdf",
  slides =       "http://www.hutter1.net/publ/sbomai.pdf",
  poster =       "http://www.hutter1.net/publ/pbomai.pdf",
  press =        "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  issn =         "2641-8770",
  doi =          "10.1109/JSAIT.2021.3073844",
  keywords =     "information theory; task analysis; computational modeling; history; 
                  schedules; Bayes methods; artificial general intelligence;
                  existental threat; alignment problem; power; instrumental goal; 
                  reinforcement learning; inference algorithms; autonomous agents; learning",
  abstract =     "Algorithmic Information Theory has inspired intractable
                  constructions of general intelligence (AGI), and undiscovered
                  tractable approximations are likely feasible. Reinforcement
                  Learning (RL), the dominant paradigm by which an agent might
                  learn to solve arbitrary solvable problems, gives an agent a
                  dangerous incentive: to gain arbitrary “power” in order to
                  intervene in the provision of their own reward. We review the
                  arguments that generally intelligent
                  algorithmic-informationtheoretic reinforcement learners such as
                  Hutter’s 2 AIXI would seek arbitrary power, including over us.
                  Then, using an information-theoretic exploration schedule, and a
                  setup inspired by causal influence theory, we present a variant
                  of AIXI which learns to not seek arbitrary power; we call it
                  “unambitious”. We show that our agent learns to accrue reward at
                  least as well as a human mentor, while relying on that mentor
                  with diminishing probability. And given a formal assumption that
                  we probe empirically, we show that eventually, the agent’s
                  worldmodel incorporates the following true fact: intervening in
                  the “outside world” will have no effect on reward acquisition;
                  hence, it has no incentive to shape the outside world.",
  support =      "ARC grant DP150104590",
  for =          "460202(33%),460209(33%),461105(33%)",
}
@TechReport{Hutter:21dgn,
  author =       "Eren Sezener and Agnieszka Grabska-Barwińska and Dimitar Kostadinov and Maxime Beau and Sanjukta Krishnagopal and David Budden and Marcus Hutter and Joel Veness and Matthew Botvinick and Claudia Clopath and Michael Häusser and Peter E. Latham",
  title =        "A Rapid and Efficient Learning Rule for Biological Neural Circuits",
  institution =  "DeepMind",
  address =      "London, UK",
  _month =        mar,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#dgn",
  http =         "https://www.biorxiv.org/content/10.1101/2021.03.10.434756",
  pdf =          "http://www.hutter1.net/publ/dgn.pdf",
  slides =       "http://www.hutter1.net/publ/sdgn.pdf",
  video =        "http://youtu.be/g1F0F8JVmGs",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  code =         "http://github.com/deepmind/deepmind-research/tree/master/gated_linear_networks/colabs",
  doi =          "10.1101/2021.03.10.434756",
  abstract =     "The dominant view in neuroscience is that changes in synaptic
                  weights underlie learning. It is unclear, however, how the brain
                  is able to determine which synapses should change, and by how
                  much. This uncertainty stands in sharp contrast to deep
                  learning, where changes in weights are explicitly engineered to
                  optimize performance. However, the main tool for doing that,
                  backpropagation, is not biologically plausible, and networks
                  trained with this rule tend to forget old tasks when learning
                  new ones. Here we introduce the Dendritic Gated Network (DGN), a
                  variant of the Gated Linear Network [1, 2], which offers a
                  biologically plausible alternative to backpropagation. DGNs
                  combine dendritic “gating” (whereby interneurons target
                  dendrites to shape neuronal response) with local learning rules
                  to yield provably efficient performance. They are significantly
                  more data efficient than conventional artificial networks and
                  are highly resistant to forgetting, and we show that they
                  perform well on a variety of tasks, in some cases better than
                  backpropagation. The DGN bears similarities to the cerebellum,
                  where there is evidence for shaping of Purkinje cell responses
                  by interneurons. It also makes several experimental predictions,
                  one of which we validate with in vivo cerebellar imaging of mice
                  performing a motor task.",
  for =          "461104(40%),520202(30%),520203(30%)",
}
@TechReport{Hutter:21scaling,
  author =       "Marcus Hutter",
  title =        "Learning Curve Theory",
  institution =  "DeepMind",
  address =      "London",
  number =       "http://arxiv.org/abs/2102.04074",
  _month =        feb,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#scaling",
  url =          "http://arxiv.org/abs/2102.04074",
  pdf =          "http://www.hutter1.net/publ/scaling.pdf",
  slides =       "http://www.hutter1.net/publ/sscaling.pdf",
  video1 =       "http://youtu.be/q5YhJ8QDkMQ",
  video2 =       "http://media.mis.mpg.de/mml/2021-03-04",
  project =      "http://www.hutter1.net/official/projects.htm#mixed",
  keywords =     "Power Law, Scaling, Learning Curve, Theory, Data Size, Error, Loss, Zipf",
  abstract =     "Recently a number of empirical ``universal'' scaling law papers
                  have been published, most notably by OpenAI. `Scaling laws'
                  refers to power-law decreases of training or test error w.r.t.\
                  more data, larger neural networks, and/or more compute. In this
                  work we focus on scaling w.r.t.\ data size $n$. Theoretical
                  understanding of this phenomenon is in its infancy, except in
                  finite-dimensional models for which error typically decreases
                  with $n^{-1/2}$ or $n^{-1}$, where $n$ is the sample size. We
                  develop and theoretically analyse the simplest possible (toy)
                  model that can exhibit $n^{-β}$ learning curves for arbitrary
                  power $β>0$, and determine to which extent power laws are
                  universal or depend on the data distribution or loss function:
                  Roughly, learning curves exhibit a power law with
                  $β=\frac{α}{1+α}$ for Zipf-distributed data with exponent $1+α$,
                  independent of the choice of loss. Furthermore, noise rapidly
                  deteriorates/improves in instantaneous/time-averaged learning
                  curves for increasing $n$, suggesting that model selection
                  should better be based on cumulative (AUC) or time-averaged error, 
                  not final test error.",
  for =          "461199(70%),461103(30%)",
}
@InProceedings{Hutter:21binesa,
  author =       "Sultan Javed Majeed and Marcus Hutter",
  title =        "Exact Reduction of Huge Action Spaces in General Reinforcement Learning",
  booktitle =    "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})",
  address =      "Virtual, Earth",
  volume =       "35",
  publisher =    "AAAI Press",
  _month =        feb,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#binesa",
  url =          "http://arxiv.org/abs/2012.10200",
  pdf =          "http://www.hutter1.net/publ/binesa.pdf",
  slides =       "http://www.hutter1.net/publ/sbinesa.pdf",
  poster =       "http://www.hutter1.net/publ/pbinesa.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  keywords =     "reinforcement learning; extreme state aggregation; action binarization; non-Markov",
  abstract =     "Th‘e reinforcement learning (RL) framework formalizes the notion
                  of learning with interactions. Many real-world problems have
                  large state-spaces and/or action-spaces such as in Go, StarCra‰ft,
                  protein folding, and robotics or are non-Markovian, which cause
                  significant challenges to RL algorithms. In this work we address
                  the large action-space problem by sequentializing actions, which
                  can reduce the action-space size significantly, even down to two
                  actions at the expense of an increased planning horizon. We
                  provide explicit and exact constructions and equivalence proofs
                  for all quantities of interest for arbitrary history-based
                  processes. In the case of MDPs, this could help RL algorithms
                  that bootstrap. In this work we show how action-binarization in
                  the non-MDP case can significantly improve Extreme State
                  Aggregation (ESA) bounds. ESA allows casting any (non-MDP,
                  non-ergodic, history-based) RL problem into a fixed-sized
                  non-Markovian state-space with the help of a surrogate Markovian
                  process. On the upside, ESA enjoys similar optimality guarantees
                  as Markovian models do. But a downside is that the size of the
                  aggregated state-space becomes exponential in the size of the
                  action-space. In this work, we patch this issue by binarizing
                  the action-space. We provide an upper bound on the number of
                  states of this binarized ESA that is logarithmic in the original
                  action-space size, a double-exponential improvement.",
  support =      "ARC grant DP150104590",
  for =          "461105(100%)",
  znote =        "Acceptance rate: 1692/7911=21\%",
}
@InProceedings{Hutter:21shortgln,
  author =       "Joel Veness, Tor Lattimore, David Budden, Avishkar Bhoopchand, Christopher Mattern, Agnieszka Grabska-Barwinska, Eren Sezener, Jianan Wang, Peter Toth, Simon Schmitt, Marcus Hutter",
  title =        "Gated Linear Networks",
  booktitle =    "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})",
  address =      "Virtual, Earth",
  volume =       "35",
  publisher =    "AAAI Press",
  _month =        feb,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#shortgln",
  url =          "http://arxiv.org/abs/1910.01526",
  pdf =          "http://www.hutter1.net/publ/shortgln.pdf",
  slides =       "http://www.hutter1.net/publ/sshortgln.pdf",
  poster =       "http://www.hutter1.net/publ/pshortgln.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  press =        "http://www.reddit.com/r/MachineLearning/comments/hx0q69/r_deepminds_gated_linear_networks_paper_and_code/",
  code =         "http://github.com/aiwabdn/pygln",
  keywords =     "gating; linear; network; geometric mixing; capacity; backpropagation-free; 
                  online convex optimization; resilience to catastrophic forgetting; 
                  density estimation; empirical evaluation; UCI; MNIST",
  abstract =     "This paper presents a new family of backpropagation-free neural
                  architectures, Gated Linear Networks (GLNs). What distinguishes
                  GLNs from contemporary neural networks is the distributed and
                  local nature of their credit assignment mechanism; each neuron
                  directly predicts the target, forgoing the ability to learn
                  feature representations in favor of rapid online learning.
                  Individual neurons can model nonlinear functions via the use of
                  data-dependent gating in conjunction with online convex
                  optimization. We show that this architecture gives rise to
                  universal learning capabilities in the limit, with effective
                  model capacity increasing as a function of network size in a
                  manner comparable with deep ReLU networks. Furthermore, we
                  demonstrate that the GLN learning mechanism possesses
                  extraordinary resilience to catastrophic forgetting, performing
                  comparably to a MLP with dropout and Elastic Weight
                  Consolidation on standard benchmarks. These desirable
                  theoretical and empirical properties position GLNs as a
                  complementary technique to contemporary offline deep learning
                  methods.",
  for =          "461104(100%)",
  znote =        "Acceptance rate: 1692/7911=21\%",
}
@InProceedings{Hutter:21outman,
  author =       "Len Du and Marcus Hutter",
  title =        "How Useful are Hand-crafted Data? Making Cases for Anomaly Detection Methods",
  booktitle =    "54th Hawaii International Conference on System Sciences ({HICSS'21})",
  address =      "Maui, Hawaii, USA",
  volume =       "54",
  pages =        "847--856",
  publisher =    "ScholarSpace",
  _month =        jan,
  year =         "2021",
  bibtex =       "http://www.hutter1.net/official/bib.htm#outman",
  http =         "http://hdl.handle.net/10125/70716",
  pdf =          "http://www.hutter1.net/publ/outman.pdf",
  slides =       "http://www.hutter1.net/publ/soutman.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mixed",
  isbn =         "978-0-9981331-4-0",
  keywords =     "accountability; evaluation; obscurity of AI algorithms;
                  anomaly detection; evaluation; explainability; small data; testing AI",
  abstract =     "While the importance of small data has been admitted in
                  principle, they have not been widely adopted as a necessity in
                  current machine learning or data mining research. Most
                  predominantly, machine learning methods were typically evaluated
                  under a “bigger is better” presumption. The more (and the more
                  complex) data we could pour at a method, the better we thought
                  we were at estimating its performance. We deem this mindset
                  detrimental to interpretability, explainability, and the
                  sustained development of the field. For example, despite that
                  new outlier detection methods were often inspired by small, low
                  dimensional samples, their performance has been exclusively
                  evaluated by large, high-dimensional datasets resembling
                  real-world use cases. With these “big data” we miss the chance
                  to gain insights from close looks at how exactly the algorithms
                  perform, as we mere humans cannot really comprehend the samples.
                  In this work, we explore in the exactly opposite direction. We
                  run several classical anomaly detection methods against small,
                  mindfully crafted cases on which the results can be examined in
                  detail. In addition to better understanding of these classical
                  algorithms, our exploration has actually led to the discovery of
                  some novel uses of classical anomaly detection methods to our
                  surprise.",
  for =          "460502(100%)",
  note =         "Nominated for best paper award: http://hicss.hawaii.edu/best-papers/",
  znote =        "Acceptance rate: 710/1449=49\%",
}

%-------------Publications-of-Marcus-Hutter-2020--------------%

@InProceedings{Hutter:20nnprune,
  author =       "Laurent Orseau and Marcus Hutter and Omar Rivasplata",
  title =        "Logarithmic Pruning is All You Need",
  booktitle =    "Advances in Neural Information Processing Systems ({NeurIPS'20})",
  volume =       "33",
  pages =        "2925--2934",
  _editor =       "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
  publisher =    "Curran Associates",
  address =      "Cambridge, MA, USA",
  _month =        dec,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#nnprune",
  http =         "http://papers.nips.cc/paper/2020/hash/1e9491470749d5b0e361ce4f0b24d037-Abstract.html",
  url =          "http://arxiv.org/abs/2006.12156",
  pdf =          "http://www.hutter1.net/publ/nnprune.pdf",
  poster =       "http://www.hutter1.net/publ/pnnprune.pdf",
  video =        "http://nips.cc/virtual/2020/public/poster_1e9491470749d5b0e361ce4f0b24d037.html",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "subnetwork; lottery ticket hypothesis; pruning; mixture of weights",
  abstract =     "The Lottery Ticket Hypothesis is a conjecture that every large
                  neural network contains a subnetwork that, when trained in
                  isolation, achieves comparable performance to the large network.
                  An even stronger conjecture has been proven recently: Every
                  sufficiently overparameterized network contains a subnetwork
                  that, even without training, achieves comparable accuracy to the
                  trained large network. This theorem, however, relies on a number
                  of strong assumptions and provides a loose polynomial factor on
                  the size of the large network compared to the target function.
                  In this work, we remove the most limiting assumptions of this
                  previous work while providing significantly tighter bounds: the
                  overparameterized network only needs to be a logarithmic factor
                  in the accuracy larger than the target subnetwork.",
  for =          "461104(100%)",
  znote =        "Acceptance rate: 1900/9454=20\%. Spotlight: 280/9454=3\%",
} 
@InProceedings{Hutter:20:nctlfmn,
  author =       "Jianan Wang and Eren Sezener and David Budden and Marcus Hutter and Joel Veness",
  title =        "A Combinatorial Perspective on Transfer Learning",
  booktitle =    "Advances in Neural Information Processing Systems ({NeurIPS'20})",
  volume =       "33",
  pages =        "918--929",
  _editor =       "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
  publisher =    "Curran Associates",
  address =      "Cambridge, MA, USA",
  _month =        dec,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#nctlfmn",
  http =         "http://papers.nips.cc/paper/2020/hash/0a3b6f64f0523984e51323fe53b8c504-Abstract.html",
  url =          "http://arxiv.org/abs/2010.12268",
  pdf =          "http://www.hutter1.net/publ/nctlfmn.pdf",
  poster =       "http://www.hutter1.net/publ/pnctlfmn.pdf",
  video =        "http://nips.cc/virtual/2020/public/poster_0a3b6f64f0523984e51323fe53b8c504.html",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  code =         "http://github.com/aiwabdn/pygln",
  keywords =     "gating; linear; network; transfer learning; online convex optimization; 
                  geometric mixing; resilience to catastrophic forgetting; 
                  forget me not process; empirical evaluation; MNIST",
  abstract =     "Human intelligence is characterized not only by the capacity to
                  learn complex skills, but the ability to rapidly adapt and
                  acquire new skills within an ever-changing environment. In this
                  work we study how the learning of modular solutions can allow
                  for effective generalization to both unseen and potentially
                  differently distributed data. Our main postulate is that the
                  combination of task segmentation, modular learning and
                  memory-based ensembling can give rise to generalization on an
                  exponentially growing number of unseen tasks. We provide a
                  concrete instantiation of this idea using a combination of: (1)
                  the Forget-Me-Not Process, for task segmentation and memory
                  based ensembling; and (2) Gated Linear Networks, which in
                  contrast to contemporary deep learning techniques use a modular
                  and local learning mechanism. We demonstrate that this system
                  exhibits a number of desirable continual learning properties:
                  robustness to catastrophic forgetting, no negative transfer and
                  increasing levels of positive transfer as more tasks are seen.
                  We show competitive performance against both offline and online
                  methods on standard continual learning benchmarks.",
  for =          "461104(100%)",
  znote =        "Acceptance rate: 1900/9454=20\%",
}
@InProceedings{Hutter:20banditgln,
  author =       "Eren Sezener, Marcus Hutter, David Budden, Jianan Wang, Joel Veness",
  title =        "Online Learning in Contextual Bandits using Gated Linear Networks",
  booktitle =    "Advances in Neural Information Processing Systems ({NeurIPS'20})",
  volume =       "33",
  pages =        "19467--19477",
  _editor =       "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
  publisher =    "Curran Associates",
  address =      "Cambridge, MA, USA",
  _month =        dec,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#banditgln",
  http =         "http://papers.nips.cc/paper/2020/hash/e287f0b2e730059c55d97fa92649f4f2-Abstract.html",
  url =          "http://arxiv.org/abs/2002.11611",
  pdf =          "http://www.hutter1.net/publ/banditgln.pdf",
  poster =       "http://www.hutter1.net/publ/pbanditgln.pdf",
  video =        "http://nips.cc/virtual/2020/public/poster_e287f0b2e730059c55d97fa92649f4f2.html",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  code =         "http://github.com/aiwabdn/pygln",
  keywords =     "gating; linear; network; contextual bandits; online convex optimization; geometric mixing",
  abstract =     "We introduce a new and completely online contextual bandit
                  algorithm called Gated Linear Contextual Bandits (GLCB). This
                  algorithm is based on Gated Linear Networks (GLNs), a recently
                  introduced deep learning architecture with properties
                  well-suited to the online setting. Leveraging data-dependent
                  gating properties of the GLN we are able to estimate prediction
                  uncertainty with effectively zero algorithmic overhead. We
                  empirically evaluate GLCB compared to 9 state-of-the-art
                  algorithms that leverage deep neural networks, on a standard
                  benchmark suite of discrete and continuous contextual bandit
                  problems. GLCB obtains mean first-place despite being the only
                  online method, and we further support these results with a
                  theoretical study of its convergence properties.",
  for =          "461104(100%)",
  znote =        "Acceptance rate: 1900/9454=20\%",
}
@Article{Hutter:20gpt3agi,
  author =       "Marcus Hutter",
  title =        "GPT-3 and AGI",
  publisher =    "Trusted Autonomous Systems",
  _month =        aug,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#gpt3agi",
  http =         "http://www.eventbrite.com.au/e/a-discussion-on-gpt-3-and-artificial-general-intelligence-tickets-116673544713",
  slides =       "http://www.hutter1.net/publ/sgpt3agi.pdf",
  video =        "http://youtu.be/E25Uk8WpYQE", 
  project =      "http://www.hutter1.net/official/projects.htm#agi",
  discussion =   "http://youtu.be/aDFLp4A1EmY",
  keywords =     "Deep Learning; Generative Pre-Trained Transformer; Inner Working; 
                  Artificial General Intellgence; Achievements; Limitations; Philosophy; Outlook",
  abstract =     "GPT-3 stands for Generative Pre-trained Transformer 3.
                  It is a gargantuan artificial Neural Network (NN) around the size of a
                  mouse brain, trained on essentially the whole internet and millions of
                  books. GPT-3 has demonstrated impressive performance on a wide
                  range of language tasks. Most discussions focus on GPT-3’s
                  performance. In this talk I will give a glimpse of how GPT-3 actually
                  works, and ask and tentatively answer the question of whether it is a
                  step towards creating Artificial General Intelligence (AGI). The talk has
                  been given as a primer in a panel discussion on this topic.",
  for =          "461103(33%),460208(33%),460202(33%)",
}
@InProceedings{Hutter:20aixipess,
  author =       "Michael Cohen and Marcus Hutter",
  title =        "Pessimism About Unknown Unknowns Inspires Conservatism",
  booktitle =    "33rd Conference on Learning Theory ({COLT'20})",
  address =      "Virtual / Graz, Austria",
  volume =       "125",
  series =       "Proceedings of Machine Learning Research",
  pages =        "1344--1373",
  _editor =       "Jacob Abernethy and Shivani Agarwal",
  publisher =    "PMLR",
  _month =        jul,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixipess",
  http =         "http://proceedings.mlr.press/v125/cohen20a.html",
  url =          "http://arxiv.org/abs/2006.08753",
  pdf =          "http://www.hutter1.net/publ/aixipess.pdf",
  slides =       "http://www.hutter1.net/publ/saixipess.pdf",
  video =        "http://www.colt2020.org/virtual/papers/paper_221.html",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  issn =         "1532-4435",
  keywords =     "",
  abstract =     "If we could define the set of all bad outcomes, we could
                  hard-code an agent which avoids them; however, in sufficiently
                  complex environments, this is infeasible. We do not know of any
                  general-purpose approaches in the literature to avoiding novel
                  failure modes. Motivated by this, we define an idealized
                  Bayesian reinforcement learner which follows a policy that
                  maximizes the worst-case expected reward over a set of
                  world-models. We call this agent pessimistic, since it optimizes
                  assuming the worst case. A scalar parameter tunes the agent's
                  pessimism by changing the size of the set of world-models taken
                  into account. Our first main contribution is: given an
                  assumption about the agent's model class, a sufficiently
                  pessimistic agent does not cause ``unprecedented events'' with
                  probability $1-\delta$, whether or not designers know how to
                  precisely specify those precedents they are concerned with.
                  Since pessimism discourages exploration, at each timestep, the
                  agent may defer to a mentor, who may be a human or some
                  known-safe policy we would like to improve. Our other main
                  contribution is that the agent's policy's value approaches at
                  least that of the mentor, while the probability of deferring to
                  the mentor goes to 0. In high-stakes environments, we might like
                  advanced artificial agents to pursue goals cautiously, which is
                  a non-trivial problem even if the agent were allowed arbitrary
                  computing power; we present a formal solution.",
  support =      "ARC grant DP150104590",
  for =          "460202(33%),460209(33%),461105(33%)",
  znote =        "Acceptance rate: 119/388 = 31\%",
}
@TechReport{Hutter:20asymnn,
  author =       "Marcus Hutter",
  title =        "On Representing (Anti)Symmetric Functions",
  institution =  "DeepMind",
  address =      "London, UK",
  number =       "arXiv:2007.15298",
  _month =        jun,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#asymnn",
  url =          "http://arxiv.org/abs/2007.15298",
  pdf =          "http://www.hutter1.net/publ/asymnn.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#nn",
  keywords =     "Neural network, approximation, universality, Slater determinant, Vandermonde
                  matrix, equivariance, symmetry, anti-symmetry, symmetric polynomials, polarized
                  basis, multilayer perceptron, continuity, smoothness",
  abstract =     "Permutation-invariant, -equivariant, and -covariant functions
                  and anti-symmetric functions are important in quantum physics,
                  computer vision, and other disciplines. Applications often
                  require most or all of the following properties: (a) a large
                  class of such functions can be approximated, e.g. all continuous
                  function, (b) only the (anti)symmetric functions can be
                  represented, (c) a fast algorithm for computing the
                  approximation, (d) the representation itself is continuous or
                  differentiable, (e) the architecture is suitable for learning
                  the function from data. (Anti)symmetric neural networks have
                  recently been developed and applied with great success. A few
                  theoretical approximation results have been proven, but many
                  questions are still open, especially for particles in more than
                  one dimension and the anti-symmetric case, which this work
                  focusses on. More concretely, we derive natural polynomial
                  approximations in the symmetric case, and approximations based
                  on a single generalized Slater determinant in the anti-symmetric
                  case. Unlike some previous super-exponential and discontinuous
                  approximations, these seem a more promising basis for future
                  tighter bounds. We provide a complete and explicit universality
                  proof of the Equivariant MultiLayer Perceptron, which implies
                  universality of symmetric MLPs and the FermiNet.",
  for =          "461104(50%),510899(50%)",
}
@TechReport{Hutter:20qcsol,
  author =       "Elliot Catt and Marcus Hutter",
  title =        "A Gentle Introduction to Quantum Computing Algorithms with Applications to Universal Prediction",
  institution =  "Australian National University",
  address =      "Canberra, Australia",
  number =       "arXiv:2005.03137",
  _month =        may,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#qcsol",
  url =          "http://arxiv.org/abs/2005.03137",
  pdf =          "http://www.hutter1.net/publ/qcsol.pdf",
  slides =       "http://www.hutter1.net/publ/sqcsol.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  abstract =     "In this technical report we give an elementary introduction to
                  Quantum Computing for non- physicists. In this introduction we
                  describe in detail some of the foundational Quantum Algorithms
                  including: the Deutsch-Jozsa Algorithm, Shor’s Algorithm, Grocer
                  Search, and Quantum Counting Algorithm and briefly the
                  Harrow-Lloyd Algorithm. Additionally we give an introduction to
                  Solomonoff Induction, a theoretically optimal method for
                  prediction. We then attempt to use Quantum computing to find
                  better algorithms for the approximation of Solomonoff Induction.
                  This is done by using techniques from other Quantum computing
                  algorithms to achieve a speedup in computing the speed prior,
                  which is an approximation of Solomonoff’s prior, a key part of
                  Solomonoff Induction. The major limiting factors are that the
                  probabilities being computed are often so small that without a
                  sufficient (often large) amount of trials, the error may be
                  larger than the result. If a substantial speedup in the
                  computation of an approximation of Solomonoff Induction can be
                  achieved through quantum computing, then this can be applied to
                  the field of intelligent agents as a key part of an
                  approximation of the agent AIXI.",
  for =          "461307(40%),461105(30%),460299(30%)",
}
@InProceedings{Hutter:20bomai,
  author =       "Michael Cohen and Badri Vellambi and Marcus Hutter",
  title =        "Asymptotically Unambitious Artificial General Intelligence",
  booktitle =    "Proc. 34rd {AAAI} Conference on Artificial Intelligence ({AAAI'20})",
  address =      "New York, USA",
  _editor =      "F. Rossi and V. Conitzer and F. Sha",
  volume =       "34",
  number =       "3",
  pages =        "2467--2476",
  publisher =    "AAAI Press",
  _month =        feb,
  year =         "2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#bomai",
  url =          "http://arxiv.org/abs/1905.12186",
  pdf =          "http://www.hutter1.net/publ/bomai.pdf",
  slides =       "http://www.hutter1.net/publ/sbomai.pdf",
  poster =       "http://www.hutter1.net/publ/pbomai.pdf",
  press =        "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  issn =         "2159-5399",
  isbn =         "978-1-57735-835-0",
  doi =          "10.1609/aaai.v34i03.5628",
  keywords =     "artificial general intelligence; history; schedules; Bayes methods; 
                  existental threat; alignment problem; power; instrumental goal; reinforcement learning.",
  abstract =     "General intelligence, the ability to solve arbitrary solvable
                  problems, is supposed by many to be artificially constructible.
                  Narrow intelligence, the ability to solve a given particularly
                  difficult problem, has seen impressive recent development.
                  Notable examples include self-driving cars, Go engines, im-
                  age classifiers, and translators. Artificial General Intelligence
                  (AGI) presents dangers that narrow intelligence does not: if
                  something smarter than us across every domain were indif-
                  ferent to our concerns, it would be an existential threat to
                  humanity, just as we threaten many species despite no ill will.
                  Even the theory of how to maintain the alignment of an AGI’s
                  goals with our own has proven highly elusive. We present the
                  first algorithm we are aware of for asymptotically unambitious
                  AGI, where “unambitiousness” includes not seeking arbitrary
                  power. Thus, we identify an exception to the Instrumental
                  Convergence Thesis, which is roughly that by default, an AGI
                  would seek power, including over us.",
  support =      "ARC grant DP150104590",
  for =          "460202(33%),460209(33%),461105(33%)",
  znote =        "Acceptance rate: 1591/7737=21\%",
}

%-------------Publications-of-Marcus-Hutter-2019--------------%

@Article{Hutter:19aligns,
  author =       "Tom Everitt and Ramana Kumar and Marcus Hutter",
  title =        "Designing Agent Incentives to Avoid Reward Tampering",
  journal =      "Medium",
  volume =       "8",
  number =       "14",
  _month =        aug,
  year =         "2019",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aligns",
  url =         "http://medium.com/@deepmindsafetyresearch/designing-agent-incentives-to-avoid-reward-tampering-4380c1bb6cd",
  pdf =          "http://www.hutter1.net/publ/aligns.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  keywords =     "AI safety, reinforcement learning, Bayesian learning, causal graphs",
  abstract =     "From an AI safety perspective, having a clear design principle
                  and a crisp characterization of what problem it solves means
                  that we don’t have to guess which agents are safe. In this post
                  and paper we describe how a design principle called current-RF
                  optimization avoids the reward function tampering problem.",
  for =          "080101(60%),220312(20%),080198(20%)",
  seo =          "970108(80%),970117(20%)",
}
@InProceedings{Hutter:19rlwlinfa,
  author =       "Marcus Hutter and Samuel Yang-Zhao and Sultan Javed Majeed",
  title =        "Conditions on Features for Temporal Difference-Like Methods to Converge",
  booktitle =    "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})",
  address =      "Macao, China",
  _editor =       "Sarit Kraus",
  _publisher =    "IJCAI",
  pages =        "2570--2577",
  _month =        aug,
  year =         "2019",
  bibtex =       "http://www.hutter1.net/official/bib.htm#rlwlinfa",
  url =          "http://arxiv.org/abs/1905.11702",
  pdf =          "http://www.hutter1.net/publ/rlwlinfa.pdf",
  slides =       "http://www.hutter1.net/publ/srlwlinfa.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  isbn =         "978-0-9992411-4-1",
  doi =          "10.24963/ijcai.2019/357",
  keywords =     "reinforcement learning; temporal difference learning; Bellman equation; 
                  unique solution; linear function approximation; convergence; 
                  negative result; wrong solution; natural algorithm",
  abstract =     "The convergence of many reinforcement learning (RL) algorithms
                  with linear function approximation has been investigated
                  extensively but most proofs assume that these methods converge
                  to a unique solution. In this paper, we provide a complete
                  characterization of non-uniqueness issues for a large class of
                  reinforcement learning algorithms, simultaneously unifying many
                  counter-examples to convergence in a theoretical framework. We
                  achieve this by proving a new condition on features that can
                  determine whether the convergence assumptions are valid or
                  non-uniqueness holds. We consider a general class of RL methods,
                  which we call natural algorithms, whose solutions are
                  characterized as the fixed point of a projected Bellman
                  equation. Our main result proves that natural algorithms
                  converge to the correct solution if and only if all the value
                  functions in the approximation space satisfy a certain shape.
                  This implies that natural algorithms are, in general, inherently
                  prone to converge to the wrong solution for most feature choices
                  even if the value function can be represented exactly. Given our
                  results, we show that state aggregation-based features are a
                  safe choice for natural algorithms and also provide a condition
                  for finding convergent algorithms under other feature
                  constructions.",
  support =      "ARC grant DP150104590",
  for =          "080101(60%),010404(40%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 850/4752=35\%",
}
@InProceedings{Hutter:19ksasao,
  author =       "Michael Cohen and Elliot Catt and Marcus Hutter",
  title =        "A Strongly Asymptotically Optimal Agent in General Environments",
  booktitle =    "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})",
  address =      "Macao, China",
  _editor =       "Sarit Kraus",
  _publisher =    "IJCAI",
  pages =        "2179--2186",
  _month =        aug,
  year =         "2019",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ksasao",
  url =          "http://arxiv.org/abs/1903.01021",
  pdf =          "http://www.hutter1.net/publ/ksasao.pdf",
  slides =       "http://www.hutter1.net/publ/sksasao.pdf",
  poster =       "http://www.hutter1.net/publ/pksasao.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-0-9992411-4-1",
  doi =          "10.24963/ijcai.2019/302",
  keywords =     "reinforcement learning; model-based reasoning; 
                  sequential decision making; probabilistic inference; AIXI",
  abstract =     "Reinforcement Learning agents are expected to eventually perform
                  well. Typically, this takes the form of a guarantee about the
                  asymptotic behavior of an algorithm given some assumptions about
                  the environment. We present an algorithm for a policy whose
                  value approaches the optimal value with probability 1 in all
                  computable probabilistic environments, provided the agent has a
                  bounded horizon. This is known as strong asymptotic optimality,
                  and it was previously unknown whether it was possible for a
                  policy to be strongly asymptotically optimal in the class of all
                  computable probabilistic environments. Our agent, Inquisitive
                  Reinforcement Learner (Inq), is more likely to explore the more
                  it expects an exploratory action to reduce its uncertainty about
                  which environment it is in, hence the term inquisitive.
                  Exploring inquisitively is a strategy that can be applied
                  generally; for more manageable environment classes,
                  inquisitiveness is tractable. We conducted experiments in
                  ``grid-worlds'' to compare the Inquisitive Reinforcement Learner
                  to other weakly asymptotically optimal agents.",
  support =      "ARC grant DP150104590",
  for =          "080101(60%),010404(40%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 850/4752=35\%",
}
@TechReport{Hutter:19fair,
  author =       "Marcus Hutter",
  title =        "Fairness without Regret",
  institution =  "DeepMind \& ANU",
  _month =        jul,
  year =         "2019",
  bibtex =       "http://www.hutter1.net/official/bib.htm#fair",
  url =          "http://arxiv.org/abs/1907.05159",
  pdf =          "http://www.hutter1.net/publ/fair.pdf",
  latex =        "http://www.hutter1.net/publ/fair.tex",
  slides =       "http://www.hutter1.net/publ/sfair.pdf",
  video =        "https://hmi.anu.edu.au/events-2/2022/4/11/hmi-dais-18-fairness-without-regret",
  project =      "http://www.hutter1.net/official/projects.htm#mixed",
  keywords =     "utility; objective; optimal; fair/equitable/just; cost/regret; uncertainty.",
  abstract =     "A popular approach of achieving fairness in optimization problems 
                  is by constraining the solution space to ``fair'' solutions, 
                  which unfortunately typically reduces solution quality.
                  In practice, the ultimate goal is often an aggregate of sub-goals 
                  without a unique or best way of combining them or which is 
                  otherwise only partially known. I turn this problem into a feature 
                  and suggest to use a parametrized objective and vary the parameters 
                  within reasonable ranges to get a {\em set} of optimal solutions,
                  which can then be optimized using secondary criteria such as 
                  fairness without compromising the primary objective, 
                  i.e.\ without regret (societal cost).",
  for =          "220104(70%),010303(30%)",
  seo =          "940401(70%),970108(30%)",
}
@InProceedings{Hutter:19actagg,
  author =       "Sultan Javed Majeed and Marcus Hutter",
  title =        "Performance Guarantees for Homomorphisms beyond Markov Decision Processes",
  booktitle =    "Proc. 33rd {AAAI} Conference on Artificial Intelligence ({AAAI'19})",
  address =      "Honolulu, USA",
  volume =       "33",
  pages =        "7659--7666",
  publisher =    "AAAI Press",
  _month =        jan,
  year =         "2019",
  bibtex =       "http://www.hutter1.net/official/bib.htm#actagg",
  url =          "http://arxiv.org/abs/1811.03895",
  pdf =          "http://www.hutter1.net/publ/actagg.pdf",
  poster =       "http://www.hutter1.net/publ/sactagg.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  issn =         "2159-5399",
  isbn =         "978-1-57735-809-1",
  doi =          "10.1609/aaai.v33i01.33017659",
  keywords =     "homomorphism; state aggregation; non-MDP; action-value aggregation;
                  reinforcement learning.",
  abstract =     "Most real-world problems have huge state and/or action spaces.
                  Therefore, a naive application of existing tabular solution
                  methods is not tractable on such problems. Nonetheless, these
                  solution methods are quite useful if an agent has access to a
                  relatively small state-action space homomorphism of the true
                  environment and near-optimal performance is guaranteed by the
                  map. A plethora of research is focused on the case when the
                  homomorphism is a Markovian representation of the underlying
                  process. However, we show that nearoptimal performance is
                  sometimes guaranteed even if the homomorphism is non-Markovian.",
  support =      "ARC grant DP150104590",
  for =          "080101(50%),080198(50%)",
  seo =          "970108(100%)",
  znote =       "Acceptance rate: 1150/7095=16\%",
}

%-------------Publications-of-Marcus-Hutter-2018--------------%

@InProceedings{Hutter:18agisafe,
  author =       "Tom Everitt and Gary Lea and Marcus Hutter",
  title =        "{AGI} Safety Literature Review",
  booktitle =    "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})",
  address =      "Stockholm, Sweden",
  _editor =       "Jérôme Lang",
  _publisher =    "IJCAI",
  pages =        "5441--5449",
  _month =        jul,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#safe",
  url =          "http://arxiv.org/abs/1805.01109",
  pdf =          "http://www.hutter1.net/publ/agisafe.pdf",
  slides =       "http://www.hutter1.net/publ/sagisafe.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  isbn =         "978-0-9992411-2-7",
  doi =          "10.24963/ijcai.2018/768",
  keywords =     "reinforcement learning; philosophical and ethical issues;
                  artificial general intelligence; AGI safety; public policy;
                  survey; future AGI.",
  abstract =     "The development of Artificial General Intelligence (AGI) promises
                  to be a major event. Along with its many potential benefits, it
                  also raises serious safety concerns. The intention of this paper is
                  to provide an easily accessible and up-to-date collection of
                  references for the emerging field of AGI safety. A significant
                  number of safety problems for AGI have been identified. We list
                  these, and survey recent research on solving them. We also cover
                  works on how best to think of AGI from the limited knowledge we
                  have today, predictions for when AGI will first be created, and
                  what will happen after its creation. Finally, we review the current
                  public policy on AGI.",
  note =         "IJCAI Review Track",
  support =      "ARC grant DP150104590",
  for =          "080101(60%),220312(20%),080198(20%)",
  seo =          "970108(80%),970117(20%)",
  znote =        "Acceptance rate: 15/43=35\%",
}
@InProceedings{Hutter:18qnonmdp,
  author =       "Sultan Javed Majeed, Marcus Hutter",
  title =        "On {Q}-learning Convergence for Non-{M}arkov Decision Processes",
  booktitle =    "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})",
  address =      "Stockholm, Sweden",
  _editor =       "Jérôme Lang",
  _publisher =    "IJCAI",
  pages =        "2546--2552",
  _month =        jul,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#qnonmdp",
  xurl =          "http://arxiv.org/abs/1807.none",
  pdf =          "http://www.hutter1.net/publ/qnonmdp.pdf",
  slides =       "http://www.hutter1.net/publ/sqnonmdp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  isbn =         "978-0-9992411-2-7",
  doi =          "10.24963/ijcai.2018/353",
  keywords =     "reinforcement learning; TD-learning; Q-learning; non-MDP;
                  non-ergodic; convergence; abstractions; state-uniformity.",
  abstract =     "Temporal-difference (TD) learning is an attractive, computationally
                  efficient framework for model- free reinforcement learning.
                  Q-learning is one of the most widely used TD learning technique
                  that enables an agent to learn the optimal action-value function,
                  i.e. Q-value function. Contrary to its widespread use, Q-learning
                  has only been proven to converge on Markov Decision Processes
                  (MDPs) and Q-uniform abstractions of finite-state MDPs. On the
                  other hand, most real-world problems are inherently non-Markovian:
                  the full true state of the environment is not revealed by recent
                  observations. In this paper, we investigate the behavior of
                  Q-learning when applied to non-MDP and non-ergodic domains which
                  may have infinitely many underlying states. We prove that the
                  convergence guarantee of Q-learning can be extended to a class of
                  such non-MDP problems, in particular, to some non-stationary
                  domains. We show that state-uniformity of the optimal Q-value
                  function is a necessary and sufficient condition for Q-learning to
                  converge even in the case of infinitely many internal states.",
  for =          "080101(50%),080198(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 710/3470=21\%",
}
@Article{Hutter:18off2onx,
  author =       "Marcus Hutter",
  title =        "Tractability of Batch to Sequential Conversion",
  journal =      "Theoretical Computer Science",
  volume =       "733",
  pages =        "71--82",
  publisher =    "Elsevier",
  _month =        jul,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#off2onx",
  url =          "http://arxiv.org/abs/1407.3334",
  pdf =          "http://www.hutter1.net/publ/off2onx.pdf",
  latex =        "http://www.hutter1.net/publ/off2onx.tex",
  slides =       "http://www.hutter1.net/publ/soff2on.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#infoth",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2018.04.037",
  keywords =     "offline; online; batch; sequential; probability; estimation;
                  prediction; time-consistency; normalization; tractable; regret;
                  combinatorics; Bayes; Laplace; Ristad; Good-Turing.",
  abstract =     "We consider the problem of converting batch estimators into a
                  sequential predictor or estimator with small extra regret. Formally
                  this is the problem of merging a collection of probability
                  measures over strings of length 1,2,3,... into a single
                  probability measure over infinite sequences. We describe various
                  approaches and their pros and cons on various examples. As a
                  side-result we give an elementary non-heuristic purely
                  combinatoric derivation of Turing's famous estimator. Our main
                  technical contribution is to determine the computational
                  complexity of sequential estimators with good guarantees in general.
                  We conclude with an open problem on how to derive tractable
                  sequential from batch estimators with good guarantees in general.",
  for =          "080401(30%),080201(30%),010405(40%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:18convbinctw,
  author =       "Badri N. Vellambi and Marcus Hutter",
  title =        "Convergence of Binarized Context-tree Weighting for Estimating Distributions of Stationary Sources",
  booktitle =    "Proc. {IEEE} International Symposium on Information Theory ({ISIT'18})",
  address =      "Vail, USA",
  pages =        "731--735",
  _editor =       "R. L. Urbanke and M. K. Varanasi",
  publisher =    "IEEE",
  _month =        jun,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#convbinctw",
  pdf =          "http://www.hutter1.net/publ/convbinctw.pdf",
  slides =       "http://www.hutter1.net/publ/sconvbinctw.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  issn =         "2157-8117",
  isbn =         "978-1-5386-4780-6",
  doi =          "10.1109/ISIT.2018.8437737",
  keywords =     "Context-tree weighting; KT estimator; frequency estimator; binarization;
                  stationary distribution; tree source; stationary ergodic source;
                  convergence rate; worst-case bounds.",
  abstract =     "This work investigates the convergence rate of learning the
                  stationary distribution of finite-alphabet stationary ergodic
                  sources using a binarized context-tree weighting approach. The
                  binarized context-tree weighting (CTW) algorithm estimates the
                  stationary distribution of a symbol as a product of conditional
                  distributions of each component bit, which are determined in a
                  sequential manner using the well known binary context-tree
                  weighting method. We establish that CTW algorithm is a consistent
                  estimator of the stationary distribution, and that the worst-case
                  $L_1$-prediction error between the CTW and frequency estimates
                  using $n$ source symbols each of which when binarized consists of
                  $k>1$ bits decays as $\Theta(\sqrt{2^k\log(n)/n})$.",
  support =      "ARC grants DP120100950 and DP150104590",
  for =          "080401(100%)",
  seo =          "970108(80%),890205(20%)",
}
@TechReport{Hutter:18align,
  author =       "Tom Everitt and Marcus Hutter",
  title =        "The Alignment Problem for History-Based {B}ayesian Reinforcement Learners",
  pages =        "70",
  _month =        jun,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#align",
  http =         "http://www.tomeveritt.se/papers/alignment.pdf",
  pdf =          "http://www.hutter1.net/publ/align.pdf",
  slides =       "http://www.hutter1.net/publ/salign.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  keywords =     "AI safety, reinforcement learning, Bayesian learning, causal graphs",
  abstract =     "Value alignment is often considered a critical component of safe
                  artificial intelligence. Meanwhile, reinforcement learning is often
                  criticized as being inherently unsafe and misaligned, for reasons
                  such as wireheading, delusionboxes, misspecified reward functions
                  and distributional shifts. In this paper, we categorize sources of
                  misalignment for reinforcement learn- ing agents, illustrating each
                  type with numerous examples. For each type of problem, we also
                  describe ways to remove the source of misalignment. Combined, the
                  suggestions form high-level blueprints for how to design value
                  aligned RL agents.",
  support =      "ARC grant DP150104590",
  for =          "080101(60%),220312(20%),080198(20%)",
  seo =          "970108(80%),970117(20%)",
  note =         "First winner of the AI alignment prize round 2:
                  http://www.lesswrong.com/posts/SSEyiHaACSYDHcYZz/announcement-ai-alignment-prize-round-2-winners-and-next",
}
@Article{Hutter:18aixicplexx,
  author =       "Jan Leike and Marcus Hutter",
  title =        "On the Computability of {S}olomonoff Induction and {AIXI}",
  journal =      "Theoretical Computer Science",
  volume =       "716",
  pages =        "28--49",
  publisher =    "Elsevier",
  _month =        mar,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixicplexx",
  pdf =          "http://www.hutter1.net/publ/aixicplexx.pdf",
  slides =       "http://www.hutter1.net/publ/saixicplex.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2017.11.020",
  keywords =     "Solomonoff induction; AIXI; General reinforcement learning;
                  Knowledge-seeking agents; Computability; Arithmetical hierarchy.",
  abstract =     "How could we solve the machine learning and the artificial
                  intelligence problem if we had infinite computation? Solomonoff
                  induction and the reinforcement learning agent AIXI are proposed
                  answers to this question. Both are known to be incomputable. We
                  quantify this using the arithmetical hierarchy, and prove upper and
                  in most cases corresponding lower bounds for incomputability.
                  Moreover, we show that AIXI is not limit computable, thus it cannot
                  be approximated using finite computation. However there are limit
                  computable epsilon-optimal approximations to AIXI. We also derive
                  computability bounds for knowledge-seeking agents, and give a limit
                  computable weakly asymptotically optimal reinforcement learning
                  agent.",
  support =      "ARC grant DP150104590",
  for =          "080101(50%),080201(50%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:18piidkkt,
  author =       "Badri N. Vellambi and Owen Cameron and Marcus Hutter",
  title =        "Universal Compression of Piecewise i.i.d. Sources",
  booktitle =    "Proc. Data Compression Conference ({DCC'18})",
  pages =        "267--276",
  _editor =       "Ali Bilgin and Michael W. Marcellin and Joan Serra{-}Sagrist{\`{a}} and James A. Storer",
  publisher =    "IEEE Computer Society",
  address =      "Snowbird, Utah, USA",
  _address =      "Alamitos, CA (publisher)",
  _month =        mar,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#piidkkt",
  pdf =          "http://www.hutter1.net/publ/piidkkt.pdf",
  slides =       "http://www.hutter1.net/publ/spiidkkt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  doi =          "10.1109/DCC.2018.00035",
  issn =         "2375-0359",
  isbn =         "978-1-5386-4884-1",
  keywords =     "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
  abstract =     "We study the problem of compressing piecewise i.i.d. sources, which
                  models the practical application of jointly compressing multiple
                  disparate data files. We establish that universal compression of
                  piecewise i.i.d data is possible by modeling the data as a Markov
                  process whose memory grows logarithmically in the size of the data
                  using the Krichevsky-Trofimov (KT) estimator. The memory order is
                  chosen large enough so that the successful gleaning of the
                  distribution of the different pieces of the data from the
                  corresponding contexts is possible, and simultaneously small enough
                  that this learning can occur for almost any realization of any
                  piecewise data process.",
  support =      "ARC grants DP120100950 and DP150104590",
  for =          "080401(100%)",
  seo =          "970108(80%),890205(20%)",
}
@InCollection{Hutter:18uaitas,
  author =       "Tom Everitt and Marcus Hutter",
  title =        "Universal Artificial Intelligence: Practical Agents and Fundamental Challenges",
  booktitle =    "Foundations of Trusted Autonomy",
  _series =       "Studies in Systems, Decision and Control 117",
  chapter =      "2",
  pages =        "15--46",
  editor =       "Hussein A. Abbass and Jason Scholz and Darryn J. Reid",
  publisher =    "Springer",
  _month =        jan,
  year =         "2018",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uaitas",
  xurl =          "http://arxiv.org/abs/1801.none",
  pdf =          "http://www.hutter1.net/publ/uaitas.pdf",
  slides =       "http://www.hutter1.net/publ/suaitas.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "2198-4182",
  isbn =         "978-3-319-64815-6",
  doi =          "10.1007/978-3-319-64816-3_2",
  keywords =     "foundations; general reinforcement learning; AI safety;
                  Solomonoff induction; intelligent agents.",
  abstract =     "Foundational theories have contributed greatly to scientific
                  progress in many fields. Examples include Zermelo-Fraenkel set
                  theory in mathematics, and universal Turing machines in computer
                  science. Universal Artificial Intelligence (UAI) is an increasingly
                  well-studied foundational theory for artificial intelligence, based
                  on ancient principles in the philosophy of science and modern
                  developments in information and probability theory. Importantly, it
                  refrains from making unrealistic Markov, ergodicity, or
                  stationarity assumptions on the environment. UAI provides a
                  theoretically optimal agent AIXI and principled ideas for
                  constructing practical autonomous agents. The theory also makes it
                  possible to establish formal results on the motivations of AI
                  systems. Such results may greatly enhance the trustability of
                  autonomous agents, and guide design choices towards more robust
                  agent architectures and incentive schemes. Finally, UAI offers a
                  deeper appreciation of fundamental problems such as the induction
                  problem and the exploration-exploitation dilemma.",
  support =      "ARC grant DP150104590",
  for =          "080101(80%),220312(20%)",
  seo =          "970108(80%),970117(20%)",
  znote =        "68500+ downloads in 2018. Top 10 most downloaded Springer books in 2018 across all Engineering:
                  http://www.springer.com/gp/campaigns/highlights-2018/engineering-2018",
}

%-------------Publications-of-Marcus-Hutter-2017--------------%

@InCollection{Hutter:17unilearn,
  author =       "Marcus Hutter",
  title =        "Universal Learning Theory",
  booktitle =    "Encyclopedia of Machine Learning and Data Mining",
  pages =        "1295--1304",
  editor =       "C. Sammut and G. Webb",
  publisher =    "Springer",
  _month =        aug,
  year =         "2017",
  edition =      "2nd",
  bibtex =       "http://www.hutter1.net/official/bib.htm#unilearn",
  url =          "http://arxiv.org/abs/1102.2467",
  pdf =          "http://www.hutter1.net/publ/unilearn.pdf",
  latex =        "http://www.hutter1.net/publ/unilearn.tex",
  slides =       "http://www.hutter1.net/ai/susp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/978-1-4899-7687-1_867",
  isbn =         "978-1-4899-7686-4",
  keywords =     "Algorithmic probability; Ray Solomonoff; induction;
                  prediction; decision; action; Turing machine;
                  Kolmogorov complexity; universal prior; Bayes' rule.",
  abstract =     "This encyclopedic article gives a mini-introduction into the
                  theory of universal learning, founded by Ray Solomonoff in the
                  1960s and significantly developed and extended in the last
                  decade. It explains the spirit of universal learning, but
                  necessarily glosses over technical subtleties.",
  for =          "080401(30%),010405(30%),080198(40%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:17thompgrls,
  author =       "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter",
  title =        "On {T}hompson Sampling and Asymptotic Optimality",
  booktitle =    "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
  address =      "Melbourne, Australia",
  _editor =       "Carles Sierra",
  _publisher =    "IJCAI",
  pages =        "4889--4893",
  _month =        aug,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#thompgrls",
  url =          "http://arxiv.org/abs/1602.07905",
  pdf =          "http://www.hutter1.net/publ/thompgrls.pdf",
  slides =       "http://www.hutter1.net/publ/sthompgrl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-0-9992411-0-3",
  doi =          "10.24963/ijcai.2017/688",
  keywords =     "General reinforcement learning; Thompson sampling;
                  asymptotic optimality; regret; discounting; recoverability; AIXI",
  abstract =     "We discuss some recent results on Thompson sampling
                  for nonparametric reinforcement learning in
                  countable classes of general stochastic environments.
                  These environments can be non-Markovian,
                  non-ergodic, and partially observable. We show
                  that Thompson sampling learns the environment
                  class in the sense that (1) asymptotically its value
                  converges in mean to the optimal value and (2)
                  given a recoverability assumption regret is sublinear.
                  We conclude with a discussion about optimality
                  in reinforcement learning.",
  support =      "ARC grant DP150104590",
  note =         "Best sister conferences paper track",
  for =          "080101(60%),010404(40%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:17corruptrl,
  author =       "Tom Everitt and Victoria Krakovna and Laurent Orseau and Marcus Hutter and Shane Legg",
  title =        "Reinforcement Learning with a Corrupted Reward Channel",
  booktitle =    "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
  address =      "Melbourne, Australia",
  _editor =       "Carles Sierra",
  _publisher =    "IJCAI",
  pages =        "4705--4713",
  _month =        aug,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#corruptrl",
  url =          "http://arxiv.org/abs/1705.08417",
  pdf =          "http://www.hutter1.net/publ/corruptrl.pdf",
  slides =       "http://www.hutter1.net/publ/scorruptrl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  isbn =         "978-0-9992411-0-3",
  doi =          "10.24963/ijcai.2017/656",
  keywords =     "decoupled reinforcement learning; reward corruption;
                  quantilisation; robustness; value learning.",
  abstract =     "No real-world reward function is perfect. Sensory errors and
                  software bugs may result in agents getting higher (or lower)
                  rewards than they should. For example, a reinforcement learning
                  agent may prefer states where a sensory error gives it the maximum
                  reward, but where the true reward is actually small. We formalise
                  this problem as a generalised Markov Decision Problem called
                  Corrupt Reward MDP. Traditional RL methods fare poorly in CRMDPs,
                  even under strong simplifying assumptions and when trying to
                  compensate for the possibly corrupt rewards. Two ways around the
                  problem are investigated. First, by giving the agent richer data,
                  such as in inverse reinforcement learning and semi-supervised
                  reinforcement learning, reward corruption stemming from systematic
                  sensory errors may sometimes be completely managed. Second, by
                  using randomisation to blunt the agent's optimisation, reward
                  corruption can be partially managed under some assumptions.",
  support =      "ARC grant DP150104590",
  for =          "080101(80%),220312(20%)",
  seo =          "970108(80%),970117(20%)",
  znote =        "Acceptance rate: 660/2540=26\%",
}
@InProceedings{Hutter:17cbefsrl,
  author =       "Jarryd Martin and Suraj Narayanan Sasikumar and Tom Everitt and Marcus Hutter",
  title =        "Count-Based Exploration in Feature Space for Reinforcement Learning",
  booktitle =    "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
  address =      "Melbourne, Australia",
  _editor =       "Carles Sierra",
  _publisher =    "IJCAI",
  pages =        "2471--2478",
  _month =        aug,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#cbefsrl",
  url =          "http://arxiv.org/abs/1706.08090",
  pdf =          "http://www.hutter1.net/publ/cbefsrl.pdf",
  slides =       "http://www.hutter1.net/publ/scbefsrl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  isbn =         "978-0-9992411-0-3",
  doi =          "10.24963/ijcai.2017/344",
  keywords =     "reinforcement learning; Markov decision process; planning under uncertainty;
                  sequential decision making; count-based exploration; ",
  abstract =     "We introduce a new count-based optimistic exploration algorithm for
                  reinforcement learning (RL) that is feasible in environments with
                  high-dimensional state-action spaces. The success of RL algorithms
                  in these domains depends crucially on generalisation from limited
                  training experience. Function approximation techniques enable RL
                  agents to generalise in order to estimate the value of unvisited
                  states, but at present few methods enable generalisation regarding
                  uncertainty. This has prevented the combination of scalable RL
                  algorithms with efficient exploration strategies that drive the
                  agent to reduce its uncertainty. We present a new method for
                  computing a generalised state visit-count, which allows the agent
                  to estimate the uncertainty associated with any state. Our
                  phi-pseudo-count achieves generalisation by exploiting the same
                  feature representation of the state space that is used for value
                  function approximation. States that have less frequently observed
                  features are deemed more uncertain. The phi-Exploration-Bonus
                  algorithm rewards the agent for exploring in feature space rather
                  than in the untransformed state space. The method is simpler and
                  less computationally expensive than some previous proposals, and
                  achieves near state-of-the-art results on high-dimensional RL
                  benchmarks.",
  support =      "ARC grant DP150104590",
  for =          "080199(50%),080101(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 660/2540=26\%
                  Also presented at SURL'17 http://www.surl.tirl.info/",
}
@InProceedings{Hutter:17urlsurexp,
  author =       "John Aslanides and Jan Leike and Marcus Hutter",
  title =        "Universal Reinforcement Learning Algorithms: Survey and Experiments",
  booktitle =    "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
  address =      "Melbourne, Australia",
  _editor =       "Carles Sierra",
  _publisher =    "IJCAI",
  pages =        "1403--1410",
  _month =        aug,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#urlsurexp",
  url =          "http://arxiv.org/abs/1705.10557",
  pdf =          "http://www.hutter1.net/publ/urlsurexp.pdf",
  slides =       "http://www.hutter1.net/publ/surlsurexp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  demo =         "http://www.hutter1.net/aixijs/",
  code =         "http://github.com/aslanides/aixijs",
  isbn =         "978-0-9992411-0-3",
  doi =          "10.24963/ijcai.2017/194",
  keywords =     "universal reinforcement learning; multi-agent system;
                  sequential decision making; survey; online demo; java code.",
  abstract =     "Many state-of-the-art reinforcement learning (RL) algorithms
                  typically assume that the environment is an ergodic Markov Decision
                  Process (MDP). In contrast, the field of universal reinforcement
                  learning (URL) is concerned with algorithms that make as few
                  assumptions as possible about the environment. The universal
                  Bayesian agent AIXI and a family of related URL algorithms have
                  been developed in this setting. While numerous theoretical
                  optimality results have been proven for these agents, there has
                  been no empirical investigation of their behavior to date. We
                  present a short and accessible survey of these URL algorithms under
                  a unified notation and framework, along with results of some
                  experiments that qualitatively illustrate some properties of the
                  resulting policies, and their relative performance on
                  partially-observable grid-world environments. We also present an
                  open-source reference implementation of the algorithms which we
                  hope will facilitate further understanding of, and experimentation
                  with, these ideas.",
  support =      "ARC grant DP150104590",
  for =          "080199(40%),080101(40%),010404(10%),010405(10%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 660/2540=26\%",
}
@InProceedings{Hutter:17offswitch,
  author =       "Tobias Wängberg and Mikael Böörs and Elliot Catt and Tom Everitt and Marcus Hutter",
  title =        "A Game-Theoretic Analysis of The Off-Switch Game",
  booktitle =    "Proc. 10th Conf. on Artificial General Intelligence ({AGI'17})",
  address =      "Melbourne, Australia",
  series =       "LNAI",
  volume =       "10414",
  pages =        "167--177",
  _editor =       "Tom Everitt and Ben Goertzel and Alexey Potapov",
  publisher =    "Springer",
  _month =        aug,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#offswitch",
  url =          "http://arxiv.org/abs/1708.03871",
  pdf =          "http://www.hutter1.net/publ/offswitch.pdf",
  slides =       "http://www.hutter1.net/publ/soffswitch.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  doi =          "10.1007/978-3-319-63703-7_16",
  issn =         "0302-9743",
  isbn =         "978-3-319-63702-0",
  keywords =     "AI safety; corrigibility; intelligent agents; game theory; uncertainty.",
  abstract =     "The off-switch game is a game theoretic model of a highly
                  intelligent robot interacting with a human. In the original paper
                  by Hadfield-Menell et al. (2016b), the analysis is not fully
                  game-theoretic as the human is modelled as an irrational player,
                  and the robot’s best action is only calculated under unrealistic
                  normality and soft-max assumptions. In this paper, we make the
                  analysis fully game theoretic, by modelling the human as a rational
                  player with a random utility function. As a consequence, we are
                  able to easily calculate the robot’s best action for arbitrary
                  belief and irrationality assumptions.",
  for =          "080101(80%),220312(20%)",
  seo =          "970108(80%),970117(20%)",
  znote =        "Also presented at PT-AI 2017.
                  http://www.pt-ai.org/2017/papers
                  Acceptance rate: 28/77 = 36\% (oral presentation) [51/77=66\% incl. posters].",
}
@InProceedings{Hutter:17expdisc,
  author =       "Sean Lamont and John Aslanides and Jan Leike and Marcus Hutter",
  title =        "Generalised Discount Functions applied to a {M}onte-{C}arlo {AI}$\mu$ Implementation",
  booktitle =    "Proc. 16th Conf. on Autonomous Agents and MultiAgent Systems ({AAMAS'17})",
  pages =        "1589--1591",
  _editor =       "Sanmay Das and Ed Durfee and Kate Larson and Michael Winikoff",
  _publisher =    "International Foundation for Autonomous Agents and Multiagent Systems",
  address =      "Sao Paulo, Brazil",
  _month =        may,
  year =         "2017",
  bibtex =       "http://www.hutter1.net/official/bib.htm#expdisc",
  http =         "http://dl.acm.org/citation.cfm?id=3091372",
  url =          "http://arxiv.org/abs/1703.01358",
  pdf =          "http://www.hutter1.net/publ/expdisc.pdf",
  latex =        "http://www.hutter1.net/publ/expdisc.tex",
  slides =       "http://www.hutter1.net/publ/sexpdisc.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  demo =         "http://www.hutter1.net/aixijs/",
  code =         "http://github.com/aslanides/aixijs",
  keywords =     "Monte Carlo; discount function; reinforcement learning; time consistency",
  abstract =     "In recent years, work has been done to develop the theory of
                  General Reinforcement Learning (GRL). However, there are no
                  examples demonstrating the known results regarding generalised
                  discounting. We have added to the GRL simulation platform (AIXIjs)
                  the functionality to assign an agent arbitrary discount functions,
                  and an environment which can be used to determine the effect of
                  discounting on an agent's policy. Using this, we investigate how
                  geometric, hyperbolic and power discounting affect an informed
                  agent in a simple MDP. We experimentally reproduce a number of
                  theoretical results, and discuss some related subtleties. It was
                  found that the agent's behaviour followed what is expected
                  theoretically, assuming appropriate parameters were chosen for the
                  Monte-Carlo Tree Search (MCTS) planning algorithm.",
  support =      "ARC grant DP150104590",
  for =          "080199(40%),080101(40%),010404(10%),010405(10%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 276/567 = 49\%",
}

%-------------Publications-of-Marcus-Hutter-2016--------------%

@Article{Hutter:16exsaggx,
  author =       "Marcus Hutter",
  title =        "Extreme State Aggregation beyond {M}arkov Decision Processes",
  journal =      "Theoretical Computer Science",
  volume =       "650",
  pages =        "73--91",
  publisher =    "Elsevier",
  _month =        oct,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#exsaggx",
  url =          "http://arxiv.org/abs/1407.3341",
  pdf =          "http://www.hutter1.net/publ/exsaggx.pdf",
  latex =        "http://www.hutter1.net/publ/exsaggx.tex",
  slides =       "http://www.hutter1.net/publ/sexsagg.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2016.07.032",
  keywords =     "State aggregation; Reinforcement learning; Non-MDP",
  abstract =     "We consider a Reinforcement Learning setup where an agent interacts
                  with an environment in observation–reward–action cycles without any
                  (esp. MDP) assumptions on the environment. State aggregation and
                  more generally feature reinforcement learning is concerned with
                  mapping histories/raw-states to reduced/aggregated states. The idea
                  behind both is that the resulting reduced process (approximately)
                  forms a small stationary finite-state MDP, which can then be
                  efficiently solved or learnt. We considerably generalize existing
                  aggregation results by showing that even if the reduced process is
                  not an MDP, the (q-)value functions and (optimal) policies of an
                  associated MDP with same state-space size solve the original
                  problem, as long as the solution can approximately be represented
                  as a function of the reduced states. This implies an upper bound on
                  the required state space size that holds uniformly for all RL
                  problems. It may also explain why RL algorithms designed for MDPs
                  sometimes perform well beyond MDPs.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:16aixideath,
  author =       "Jarryd Martin and Tom Everitt and Marcus Hutter",
  title =        "Death and Suicide in Universal Artificial Intelligence",
  booktitle =    "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
  address =      "New York, USA",
  series =       "LNAI",
  volume =       "9782",
  pages =        "23--32",
  _editor =       "Bas Steunebrink and Pei Wang and Ben Goertzel",
  publisher =    "Springer",
  _month =        jul,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixideath",
  url =          "http://arxiv.org/abs/1606.00652",
  pdf =          "http://www.hutter1.net/publ/aixideath.pdf",
  latex =        "http://www.hutter1.net/publ/aixideath.tex",
  slides =       "http://www.hutter1.net/publ/saixideath.pdf",
  video =        "http://youtu.be/c__OjDHqFs",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  doi =          "10.1007/978-3-319-41649-6_3",
  issn =         "0302-9743",
  isbn =         "978-3-319-41648-9",
  keywords =     "intelligent agents; death; suicide; aixi; reinforcement learning; semimeasure",
  abstract =     "Reinforcement learning (RL) is a general paradigm for studying
                  intelligent behaviour, with applications ranging from artificial
                  intelligence to psychology and economics. AIXI is a universal
                  solution to the RL problem; it can learn any computable environment.
                  A technical subtlety of AIXI is that it is defined using a mixture over
                  semimeasures that need not sum to 1, rather than
                  over proper probability measures. In this work we argue that
                  the shortfall of a semimeasure can naturally be interpreted as
                  the agent's estimate of the probability of its death. We formally define
                  death for generally intelligent agents like AIXI, and prove a number
                  of related theorems about their behaviour. Notable discoveries
                  include that agent behaviour can change radically under positive linear
                  transformations of the reward signal (from suicidal to
                  dogmatically self-preserving), and that the agent's posterior belief
                  that it will survive increases over time.",
  support =      "ARC grant DP150104590",
  for =          "080101(80%),220312(20%)",
  seo =          "970108(80%),970122(10%),970117(10%)",
  znote =        "Acceptance rate: 24/67 = 36\%",
}
@InProceedings{Hutter:16wirehead,
  author =       "Tom Everitt and Marcus Hutter",
  title =        "Avoiding Wireheading with Value Reinforcement Learning",
  booktitle =    "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
  address =      "New York, USA",
  series =       "LNAI",
  volume =       "9782",
  pages =        "12--22",
  _editor =       "Bas Steunebrink and Pei Wang and Ben Goertzel",
  publisher =    "Springer",
  _month =        jul,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#wirehead",
  url =          "http://arxiv.org/abs/1605.03143",
  pdf =          "http://www.hutter1.net/publ/wirehead.pdf",
  latex =        "http://www.hutter1.net/publ/wirehead.tex",
  slides =       "http://www.hutter1.net/publ/swirehead.pdf",
  video =        "http://youtu.be/sqFc2-_mDCk",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  doi =          "10.1007/978-3-319-41649-6_2",
  issn =         "0302-9743",
  isbn =         "978-3-319-41648-9",
  keywords =     "intelligent agents; reinforcement learning; wireheading; value RL; utility function; safety",
  abstract =     "How can we design good goals for arbitrarily intelligent agents? Reinforcement
                  learning (RL) is a natural approach. Unfortunately, RL does not work well for
                  generally intelligent agents, as RL agents are incentivised to shortcut the
                  reward sensor for maximum reward -- the so-called wireheading problem. In this
                  paper we suggest an alternative to RL called value reinforcement learning (VRL).
                  In VRL, agents use the reward signal to learn a utility function. The VRL setup
                  allows us to remove the incentive to wirehead by placing a constraint on the
                  agent's actions. The constraint is defined in terms of the agent's belief
                  distributions, and does not require an explicit specification of which actions
                  constitute wireheading.",
  support =      "ARC grant DP150104590",
  for =          "080101(70%),220312(30%)",
  seo =          "970108(60%),970122(20%),970117(20%)",
  znote =        "Acceptance rate: 24/67 = 36\%",
}
@InProceedings{Hutter:16selfmod,
  author =       "Tom Everitt and Daniel Filan and Mayank Daswani and Marcus Hutter",
  title =        "Self-Modification of Policy and Utility Function in Rational Agents",
  booktitle =    "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
  address =      "New York, USA",
  series =       "LNAI",
  volume =       "9782",
  pages =        "1--11",
  _editor =       "Bas Steunebrink and Pei Wang and Ben Goertzel",
  publisher =    "Springer",
  _month =        jul,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#selfmod",
  url =          "http://arxiv.org/abs/1605.03142",
  pdf =          "http://www.hutter1.net/publ/selfmod.pdf",
  latex =        "http://www.hutter1.net/publ/selfmod.tex",
  video =        "http://youtu.be/sqFc2-_mDCk",
  award =        "http://agi-conf.org/2016/prizes/",
  project =      "http://www.hutter1.net/official/projects.htm#safe",
  doi =          "10.1007/978-3-319-41649-6_1",
  issn =         "0302-9743",
  isbn =         "978-3-319-41648-9",
  keywords =     "intelligent agents; self-modification; goal preservation; utility functions; control problem; safety",
  abstract =     "Any agent that is part of the environment it interacts with and has versatile
                  actuators (such as arms and fingers), will in principle have the ability to
                  self-modify -- for example by changing its own source code. As we continue to
                  create more and more intelligent agents, chances increase that they will learn
                  about this ability. The question is: will they want to use it? For example,
                  highly intelligent systems may find ways to change their goals to something more
                  easily achievable, thereby `escaping' the control of their designers. In an
                  important paper, Omohundro (2008) argued that goal preservation is a fundamental
                  drive of any intelligent system, since a goal is more likely to be achieved if
                  future versions of the agent strive towards the same goal. In this paper, we
                  formalise this argument in general reinforcement learning, and explore
                  situations where it fails. Our conclusion is that the self-modification
                  possibility is harmless if and only if the value function of the agent
                  anticipates the consequences of self-modifications and use the current utility
                  function when evaluating the future.",
  support =      "ARC grant DP150104590",
  for =          "080101(70%),220312(30%)",
  seo =          "970108(60%),970122(20%),970117(20%)",
  znote =        "Acceptance rate: 24/67 = 36\%",
  note =         "Winner of the Kurzweil Prize for Best AGI Paper",
}
@InProceedings{Hutter:16vacrecog,
  author =       "Basura Fernando and Peter Anderson and Marcus Hutter and Stephen Gould",
  title =        "Discriminative Hierarchical Rank Pooling for Activity Recognition",
  booktitle =    "Proc. IEEE Conference on Computer Vision and Pattern Recognition ({CVPR'16})",
  address =      "Las Vegas, NV, USA",
  pages =        "1924--1932",
  _editor =       "Lourdes Agapito, Tamara Berg, Jana Kosecka, Lihi Zelnik-Manor",
  publisher =    "IEEE",
  _month =        jun,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#vacrecog",
  pdf =          "http://www.hutter1.net/publ/vacrecog.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#cvip",
  code =         "http://www.hutter1.net/publ/varcode.zip",
  issn =         "1063-6919",
  doi =          "10.1109/CVPR.2016.212",
  keywords =     "rank pooling; activity classification; hierarchy; video; training;
                  convolutional neural network; nonlinear feature functions",
  abstract =     "We present hierarchical rank pooling, a video sequence encoding
                  method for activity recognition. It consists of a network of rank
                  pooling functions which captures the dynamics of rich convolutional
                  neural network features within a video sequence. By stacking
                  non-linear feature functions and rank pooling over one another, we
                  obtain a high capacity dynamic encoding mechanism, which is used
                  for action recognition. We present a method for jointly learning
                  the video representation and activity classifier parameters.
                  Our method obtains state-of-the art results on three important
                  activity recognition benchmarks: 76.7\% on Hollywood2,
                  66.9\% on HMDB51 and, 91.4\% on UCF101.",
  for =          "080104(50%),080106(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 643/1865 = 30\%",
}
@InProceedings{Hutter:16thompgrl,
  author =       "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter",
  title =        "Thompson Sampling is Asymptotically Optimal in General Environments",
  booktitle =    "Proc. 32nd International Conf. on Uncertainty in Artificial Intelligence ({UAI'16})",
  address =      "New Jersey, USA",
  _editor =       "Alexander Ihler and Dominik Janzing",
  publisher =    "AUAI Press",
  pages =        "417--426",
  _month =        jun,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#thompgrl",
  http =         "http://auai.org/uai2016/proceedings/papers/20.pdf",
  url =          "http://arxiv.org/abs/1602.07905",
  pdf =          "http://www.hutter1.net/publ/thompgrl.pdf",
  latex =        "http://www.hutter1.net/publ/thompgrl.tex",
  slides =       "http://www.hutter1.net/publ/sthompgrl.pdf",
  award =        "http://auai.org/uai2016/program.php",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-0-9966431-1-5",
  keywords =     "General reinforcement learning; Thompson sampling;
                  asymptotic optimality; regret; discounting; recoverability; AIXI",
  abstract =     "We discuss a variant of Thompson sampling for nonparametric
                  reinforcement learning in countable classes of general stochastic
                  environments. These environments can be non-Markov, nonergodic, and
                  partially observable. We show that Thompson sampling learns the
                  environment class in the sense that (1) asymptotically its value
                  converges to the optimal value in mean and (2) given a
                  recoverability assumption regret is sublinear.",
  support =      "ARC grant DP150104590",
  for =          "080101(60%),010404(40%)",
  seo =          "970108(100%)",
  note =         "Best student paper",
  znote =        "Acceptance rate: 26/275 = 9\% (oral!) [85/275 = 31\% incl. poster]",
}
@InProceedings{Hutter:16speedprior,
  author =       "Daniel Filan and Jan Leike and Marcus Hutter",
  title =        "Loss Bounds and Time Complexity for Speed Priors",
  booktitle =    "Proc. 19th International Conf. on Artificial Intelligence and Statistics ({AISTATS'16})",
  address =      "Cadiz, Spain",
  volume =       "51",
  _editor =       "Arthur Gretton and Christian Robert",
  publisher =    "Microtome",
  pages =        "1394--1402",
  _month =        may,
  year =         "2016",
  bibtex =       "http://www.hutter1.net/official/bib.htm#speedprior",
  http =         "http://jmlr.org/proceedings/papers/v51/",
  url =          "http://arxiv.org/abs/1604.03343",
  pdf =          "http://www.hutter1.net/publ/speedprior.pdf",
  latex =        "http://www.hutter1.net/publ/speedprior.tex",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "0302-9743",
  keywords =     "universal distribution; speed prior; computational complexity; predictive performance; upper bounds.",
  abstract =     "This paper establishes for the first time the predictive
                  performance of speed priors and their computational complexity. A
                  speed prior is essentially a probability distribution that puts low
                  probability on strings that are not efficiently computable. We
                  propose a variant to the original speed prior (Schmidhuber, 2002),
                  and show that our prior can predict sequences drawn from
                  probability measures that are estimable in polynomial time. Our
                  speed prior is computable in doubly-exponential time, but not in
                  polynomial time. On a polynomial time computable sequence our speed
                  prior is computable in exponential time. We show better upper
                  complexity bounds for Schmidhuber's speed prior under the same
                  conditions, and that it predicts deterministic sequences that are
                  computable in polynomial time; however, we also show that it is not
                  computable in polynomial time, and the question of its predictive
                  properties for stochastic sequences remains open.",
  support =      "ARC grant DP150104590",
  for =          "080101(20%),080201(30%),080401(20%),010404(30%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 165/537 = 31\%",
}

%-------------Publications-of-Marcus-Hutter-2015--------------%

@InProceedings{Hutter:15metasearch1,
  author =       "Tom Everitt and Marcus Hutter",
  title =        "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art I: {T}ree Search",
  booktitle =    "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})",
  address =      "Canberra, Australia",
  series =       "LNAI",
  volume =       "9457",
  _editor =       "Bernhard Pfahringer and Jochen Renz",
  publisher =    "Springer",
  pages =        " 157--165",
  _month =        dec,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#metasearch1",
  url =          "http://arxiv.org/abs/1509.02709",
  pdf =          "http://www.hutter1.net/publ/metasearch1.pdf",
  slides =       "http://www.hutter1.net/publ/smetasearch.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#search",
  code =         "http://www.hutter1.net/publ/metasearchcode.zip",
  issn =         "0302-9743",
  isbn =         "978-3-319-26349-6",
  doi =          "10.1007/978-3-319-26350-2_14",
  keywords =     "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics,
                  Tree Search, Probabilistic Goal Distribution",
  abstract =     "BFS and DFS are the two most fundamental search algo-
                  rithms. We derive approximations of their expected runtimes in complete
                  trees, as a function of tree depth and probabilistic goal distribution. We
                  also demonstrate that the analytical approximations are close to the
                  empirical averages for most parameter settings, and that the results can
                  be used to predict the best algorithm given the relevant problem features.",
  for =          "080199(50%),080201(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate (all papers): 57/102 = 56\%",
}
@InProceedings{Hutter:15metasearch2,
  author =       "Tom Everitt and Marcus Hutter",
  title =        "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art II: {G}raph Search",
  booktitle =    "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})",
  address =      "Canberra, Australia",
  series =       "LNAI",
  volume =       "9457",
  _editor =       "Bernhard Pfahringer and Jochen Renz",
  publisher =    "Springer",
  pages =        "166--178",
  _month =        dec,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#metasearch2",
  url =          "http://arxiv.org/abs/1509.02709",
  pdf =          "http://www.hutter1.net/publ/metasearch2.pdf",
  slides =       "http://www.hutter1.net/publ/smetasearch.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#search",
  code =         "http://www.hutter1.net/publ/metasearchcode.zip",
  issn =         "0302-9743",
  isbn =         "978-3-319-26349-6",
  doi =          "10.1007/978-3-319-26350-2_15",
  keywords =     "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics,
                  Graph Search, Probabilistic Goal Distribution",
  abstract =     "The algorithm selection problem asks to select the best algorithm for
                  a given problem. In the companion paper (Everitt and Hutter, AusAI, 2015),
                  expected BFS and DFS tree search runtime was approximated
                  as a function of tree depth and probabilistic goal distribution. Here we
                  provide an analogous analysis of BFS and DFS graph search, deriving
                  expected runtime as a function of graph structure and goal distribution.
                  The applicability of the method is demonstrated through analysis of two
                  different grammar problems. The approximations come surprisingly close
                  to empirical reality.",
  for =          "080199(50%),080201(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate (full papers): 39/102 = 38\%",
}
@InProceedings{Hutter:15sikscplex,
  author =       "Jan Leike and Marcus Hutter",
  title =        "On the Computability of {S}olomonoff Induction and Knowledge-Seeking",
  booktitle =    "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})",
  address =      "Banff, Canada",
  series =       "LNAI",
  volume =       "9355",
  _editor =       "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles",
  publisher =    "Springer",
  pages =        "364--378",
  _month =        oct,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#sikscplex",
  url =          "http://arxiv.org/abs/1507.04124",
  pdf =          "http://www.hutter1.net/publ/sikscplex.pdf",
  slides =       "http://www.hutter1.net/publ/ssikscplex.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "0302-9743",
  isbn =         "978-3-319-24485-3",
  doi =          "10.1007/978-3-319-24486-0_24",
  keywords =     "Solomonoff induction; Exploration; Knowledge-seeking agents;
                  General reinforcement learning; Asymptotic optimality; Computability;
                  Complexity; Arithmetical hierarchy; Universal turing machine; AIXI; BayesExp",
  abstract =     "Solomonoff induction is held as a gold standard for learning,
                  but it is known to be incomputable. We quantify its incomputability
                  by placing various flavors of Solomonoff's prior M in the arithmetical
                  hierarchy. We also derive computability bounds for knowledge-seeking
                  agents, and give a limit-computable weakly asymptotically optimal reinforcement
                  learning agent.",
  support =      "ARC grant DP150104590",
  for =          "080101(50%),080201(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 23/46 = 50\%",
}
@InProceedings{Hutter:15solraven,
  author =       "Jan Leike and Marcus Hutter",
  title =        "Solomonoff Induction Violates {N}icod's Criterion",
  booktitle =    "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})",
  address =      "Banff, Canada",
  series =       "LNAI",
  volume =       "9355",
  _editor =       "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles",
  publisher =    "Springer",
  pages =        "349--363",
  _month =        oct,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#solraven",
  url =          "http://arxiv.org/abs/1507.04121",
  pdf =          "http://www.hutter1.net/publ/solraven.pdf",
  slides =       "http://www.hutter1.net/publ/ssolraven.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  code =         "http://www.hutter1.net/publ/solraven.cpp",
  issn =         "0302-9743",
  isbn =         "978-3-319-24485-3",
  doi =          "10.1007/978-3-319-24486-0_23",
  keywords =     "Bayesian reasoning; Confirmation; Disconfirmation; Hempel’s paradox;
                  Equivalence condition; Solomonoff normalization",
  abstract =     "Nicod's criterion states that observing a black raven is evidence
                  for the hypothesis H that all ravens are black. We show that
                  Solomonoff induction does not satisfy Nicod's criterion: there are
                  time steps in which observing black ravens decreases the belief in
                  H. Moreover, while observing any computable infinite string
                  compatible with H, the belief in H decreases infinitely often when
                  using the unnormalized Solomonoff prior, but only finitely often
                  when using the normalized Solomonoff prior. We argue that the fault
                  is not with Solomonoff induction; instead we should reject Nicod’s
                  criterion.",
  note =         "Also presented at CCR: http://math.uni-heidelberg.de/logic/conferences/ccr2015/",
  support =      "ARC grant DP150104590",
  for =          "080199(50%),220399(50%)",
  seo =          "970108(50%),970122(50%)",
  znote =        "Acceptance rate: 23/46 = 50\%",
}
@InProceedings{Hutter:15seqdts,
  author =       "Tom Everitt and Jan Leike and Marcus Hutter",
  title =        "Sequential Extensions of Causal and Evidential Decision Theory",
  booktitle =    "Proc. 4th International Conf. on Algorithmic Decision Theory ({ADT'15})",
  address =      "Lexington, USA",
  series =       "LNAI",
  volume =       "9346",
  _editor =       "Toby Walsh",
  publisher =    "Springer",
  pages =        "205--221",
  _month =        sep,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#seqdts",
  url =          "http://arxiv.org/abs/1506.07359",
  pdf =          "http://www.hutter1.net/publ/seqdts.pdf",
  slides =       "http://www.hutter1.net/publ/sseqdts.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#universal",
  issn =         "0302-9743",
  isbn =         "978-3-319-23113-6",
  doi =          "10.1007/978-3-319-23114-3_13",
  keywords =     "Evidential decision theory; Causal decision theory;
                  Planning; Causal graphical models; Dualism; Physicalism",
  abstract =     "Moving beyond the dualistic view in AI where agent and environment
                  are separated incurs new challenges for decision making, as
                  calculation of expected utility is no longer straightforward. The
                  non-dualistic decision theory literature is split between causal
                  decision theory and evidential decision theory. We extend these
                  decision algorithms to the sequential setting where the agent
                  alternates between taking actions and observing their consequences.
                  We find that evidential decision theory has two natural extensions
                  while causal decision theory only has one.",
  support =      "ARC grant DP150104590",
  for =          "080101(50%),220302(50%)",
  seo =          "970108(50%),270122(50%)",
  znote =        "Acceptance rate: 32/70 = 45\%",
}
@Article{Hutter:15ratagentx,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Rationality, Optimism and Guarantees in General Reinforcement Learning",
  journal =      "Journal of Machine Learning Research",
  volume =       "16",
  pages =        "1345--1390",
  publisher =    "Microtome",
  _address =     "Princeton, NJ, USA",
  _month =        aug,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ratagentx",
  url =          "http://jmlr.org/papers/v16/sunehag15a.html",
  pdf =          "http://www.hutter1.net/publ/ratagentx.pdf",
  slides =       "http://www.hutter1.net/publ/sagproblaws.pdf",
  slides =       "http://www.hutter1.net/publ/soptcog.pdf",
  slides =       "http://www.hutter1.net/publ/sagscilaws.pdf",
  slides =       "http://www.hutter1.net/publ/saixiopt.pdf",
  slides =       "http://www.hutter1.net/publ/soptopt.pdf",
  slides =       "http://www.hutter1.net/publ/saixiaxiom.pdf",
  slides =       "http://www.hutter1.net/publ/saixiaxiom2.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "1532-4435",
  keywords =     "Reinforcement Learning, Rationality, Optimism, Optimality, Error bounds",
  abstract =     "In this article, we present a top-down theoretical study of general
                  reinforcement learning agents. We begin with rational agents with
                  unlimited resources and then move to a setting where an agent can
                  only maintain a limited number of hypotheses and optimizes plans
                  over a horizon much shorter than what the agent designer actually
                  wants. We axiomatize what is rational in such a setting in a manner
                  that enables optimism, which is important to achieve systematic
                  explorative behavior. Then, within the class of agents deemed
                  rational, we achieve convergence and finite-error bounds. Such
                  results are desirable since they imply that the agent learns well
                  from its experiences, but the bounds do not directly guarantee good
                  performance and can be achieved by agents doing things one should
                  obviously not. Good performance cannot in fact be guaranteed for
                  any agent in fully general settings. Our approach is to design
                  agents that learn well from experience and act rationally. We
                  introduce a framework for general reinforcement learning agents
                  based on rationality axioms for a decision function and an
                  hypothesis-generating function designed so as to achieve guarantees
                  on the number errors. We will consistently use an optimistic
                  decision function but the hypothesis-generating function needs to
                  change depending on what is known/assumed. We investigate a number
                  of natural situations having either a frequentist or Bayesian flavor,
                  deterministic or stochastic environments and either finite or
                  countable hypothesis class. Further, to achieve sufficiently good
                  bounds as to hold promise for practical success we introduce a
                  notion of a class of environments being generated by a set of laws.
                  None of the above has previously been done for fully general
                  reinforcement learning environments.",
  support =      "ARC grant DP120100950",
  for =          "080101(60%),010404(30%),220302(10%)",
  seo =          "970108(90%),970122(10%)",
}
@Article{Hutter:15mnonconvx,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "On {M}artin-L{\"o}f (Non)Convergence of {S}olomonoff's Universal Mixture",
  journal =      "Theoretical Computer Science",
  volume =       "588",
  pages =        "2--15",
  publisher =    "Elsevier",
  _month =        jul,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mnonconvx",
  pdf =          "http://www.hutter1.net/publ/mnonconvx.pdf",
  slides =       "http://www.hutter1.net/publ/smnonconv.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2014.12.004",
  keywords =     "Solomonoff induction, Kolmogorov complexity, theory of computation.",
  abstract =     "We study the convergence of Solomonoff's universal mixture on
                  individual Martin-Löf random sequences. A new result is presented
                  extending the work of Hutter and Muchnik [3] by showing that there
                  does not exist a universal mixture that converges on all Martin-Löf
                  random sequences. We show that this is not an artifact of the fact
                  that the universal mixture is not a proper measure and that the
                  normalised universal mixture also fails to converge on all
                  Martin-Löf random sequences.",
  for =          "080401(50%),010404(30%),010405(20%)",
  seo =          "970101(30%),970108(70%)",
}
@InProceedings{Hutter:15learncnf,
  author =       "Joel Veness and Marcus Hutter and Laurent Orseau and Marc Bellemare",
  title =        "Online Learning of {k-CNF} Boolean Functions",
  booktitle =    "Proc. 24th International Joint Conf. on Artificial Intelligence ({IJCAI'15})",
  address =      "Buenos Aires, Argentina",
  _editor =       "Qiang Yang and Michael Wooldridge",
  publisher =    "AAAI Press",
  pages =        "3865--3873",
  _month =        jul,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#learncnf",
  url =          "http://arxiv.org/abs/1403.6863",
  pdf =          "http://www.hutter1.net/publ/learncnf.pdf",
  slides =       "http://www.hutter1.net/publ/slearncnf.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  isbn =         "978-1-57735-738-4",
  keywords =     "k-CNF, Online Learning, Logarithmic Loss, Bayesian algorithm",
  abstract =     "This paper revisits the problem of learning a k-CNF Boolean function
                  from examples, for fixed k, in the context of online learning under
                  the logarithmic loss. We give a Bayesian interpretation to one of
                  Valiant's classic PAC learning algorithms, which we then build upon
                  to derive three efficient, online, probabilistic, supervised
                  learning algorithms for predicting the output of an unknown k-CNF
                  Boolean function. We analyze the loss of our methods, and show that
                  the cumulative log-loss can be upper bounded by a polynomial
                  function of the size of each example.",
  support =      "ARC grant DP150104590",
  for =          "080101(30%),010404(30%),080201(40%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 572/1996 = 29\%",
}
@InProceedings{Hutter:15agproblaws,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Using Localization and Factorization to Reduce the Complexity of Reinforcement Learning",
  booktitle =    "Proc. 8th Conf. on Artificial General Intelligence ({AGI'15})",
  address =      "Berlin, Germany",
  series =       "LNAI",
  volume =       "9205",
  pages =        "177--186",
  _editor =       "Jordi Bieger and Ben Goertzel and Alexey Potapov",
  publisher =    "Springer",
  _month =        jul,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#agproblaws",
  pdf =          "http://www.hutter1.net/publ/agproblaws.pdf",
  slides =       "http://www.hutter1.net/publ/sagproblaws.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-319-21365-1_19",
  issn =         "0302-9743",
  isbn =         "978-3-319-21364-4",
  keywords =     "reinforcement learning; laws; optimism; bounds",
  abstract =     "General reinforcement learning is a powerful framework for
                  artificial intelligence that has seen much theoretical progress since introduced
                  fifteen years ago. We have previously provided guarantees for
                  cases with finitely many possible environments. Though the results are
                  the best possible in general, a linear dependence on the size of the hypothesis
                  class renders them impractical. However, we dramatically improved
                  on these by introducing the concept of environments generated
                  by combining laws. The bounds are then linear in the number of laws
                  needed to generate the environment class. This number is identified as a
                  natural complexity measure for classes of environments. The individual
                  law might only predict some feature (factorization) and only in some
                  contexts (localization). We here extend previous deterministic results to
                  the important stochastic setting.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(80%),970122(20%)",
  znote =        "Acceptance rate: 41/72 = 57\%.",
}
@InProceedings{Hutter:15aixicplex,
  author =       "Jan Leike and Marcus Hutter",
  title =        "On the Computability of AIXI",
  booktitle =    "Proc. 31st International Conf. on Uncertainty in Artificial Intelligence ({UAI'15})",
  address =      "Amsterdam, Netherlands",
  _editor =       "Marina Meila and Tom Heskes",
  publisher =    "AUAI Press",
  pages =        "464--473",
  _month =        jul,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixicplex",
  url =          "http://arxiv.org/abs/1510.05572",
  pdf =          "http://www.hutter1.net/publ/aixicplex.pdf",
  slides =       "http://www.hutter1.net/publ/saixicplex.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-0-9966431-0-8",
  keywords =     "AIXI; Solomonoff induction; general reinforcement learning;
                  computability; complexity; arithmetical hierarchy; universal Turing machine.",
  abstract =     "How could we solve the machine learning and the artificial
                  intelligence problem if we had infinite computation? Solomonoff
                  induction and the reinforcement learning agent AIXI are proposed
                  answers to this question. Both are known to be incomputable. In
                  this paper, we quantify this using the arithmetical hierarchy, and
                  prove upper and corresponding lower bounds for incomputability. We
                  show that AIXI is not limit computable, thus it cannot be
                  approximated using finite computation. Our main result is a
                  limit-computable epsilon-optimal version of AIXI with infinite horizon
                  that maximizes expected rewards.",
  support =      "ARC grant DP150104590",
  for =          "080101(50%),080201(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 99/291=34\%",
}
@Article{Hutter:15aixiprior,
  author =       "Jan Leike and Marcus Hutter",
  title =        "Bad Universal Priors and Notions of Optimality",
  journal =      "Journal of Machine Learning Research, W\&CP: COLT",
  volume =       "40",
  pages =        "1244--1259",
  _editor =       "Peter Grünwald and Elad Hazan",
  publisher =    "",
  _address =     "Princeton, NJ, USA",
  _month =        jul,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiprior",
  http =         "http://jmlr.org/proceedings/papers/v40/Leike15.html",
  url =          "http://arxiv.org/abs/1510.04931",
  pdf =          "http://www.hutter1.net/publ/aixiprior.pdf",
  slides =       "http://www.hutter1.net/publ/saixiprior.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "1532-4435",
  keywords =     "AIXI, general reinforcement learning, universal Turing machine,
                  Legg-Hutter intelligence, balanced Pareto optimality, asymptotic optimality.",
  abstract =     "A big open question of algorithmic information theory is the choice
                  of the universal Turing machine (UTM). For Kolmogorov complexity
                  and Solomonoff induction we have invariance theorems: the choice of
                  the UTM changes bounds only by a constant. For the universally
                  intelligent agent AIXI (Hutter, 2005) no invariance theorem is
                  known. Our results are entirely negative: we discuss cases in which
                  unlucky or adversarial choices of the UTM cause AIXI to misbehave
                  drastically. We show that Legg-Hutter intelligence and thus
                  balanced Pareto optimality is entirely subjective, and that every
                  policy is Pareto optimal in the class of all computable
                  environments. This undermines all existing optimality properties
                  for AIXI. While it may still serve as a gold standard for AI, our
                  results imply that AIXI is a relative theory, dependent on the
                  choice of the UTM. ",
  note =         "Also presented at EWRL'15.
                  http://ewrl.files.wordpress.com/2015/02/ewrl12\_2015\_submission\_3.pdf",
  support =      "ARC grant DP150104590",
  for =          "080101(80%),080401(20%)",
  seo =          "970108(100%)",
  znote =        "28th Annual Conf. on Learning Theory. Acceptance rate: 27/176 = 15\%",
}
@InCollection{Hutter:15aitcog,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Algorithmic Complexity",
  booktitle =    "International Encyclopedia of the Social \& Behavioral Sciences",
  volume =       "1",
  pages =        "534–-538",
  editor =       "James D. Wright",
  publisher =    "Elsevier",
  _month =        apr,
  year =         "2015",
  edition =      "2nd",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aitcog",
  pdf =          "http://www.hutter1.net/publ/aitcog.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  isbn =         "978-0-080-97086-8",
  doi =          "10.1016/B978-0-08-097086-8.43001-1",
  keywords =     "Kolmogorov Complexity, Algorithmic Information Theory, Cognition,
                  Rationality, Simplicity, Optimism, Induction, Similarity, Clustering,
                  Prediction, Agents, Learning, Reinforcement",
  abstract =     "Algorithmic complexity provides a mathematical formal notion of
                  string complexity. Building on this, one arrives at mathematical
                  ‘gold standard’ (though incomputable) definitions of randomness,
                  induction, similarity, and even intelligence. These definitions can
                  be turned into practical algorithms by using common compressors to
                  approximate the universal solutions. One can consider the theories
                  as idealized cognition with respect to which one can aim to
                  describe actual biological cognition by listing biases and
                  limitations that need to be defined relative to some normative
                  reference.",
  support =      "ARC grant DP120100950",
  for =          "170203(50%),080401(50%)",
  seo =          "970117(100%)",
}
@InProceedings{Hutter:15cnc,
  author =       "Joel Veness and Marc Bellemare and Marcus Hutter and Alvin Chua and Guillaume Desjardins",
  title =        "Compress and Control",
  booktitle =    "Proc. 29th {AAAI} Conference on Artificial Intelligence ({AAAI'15})",
  address =      "Austin, USA",
  pages =        "3016--3023",
  _editor =       "Blai Bonet and Sven Koenig",
  publisher =    "AAAI Press",
  _month =        jan,
  year =         "2015",
  bibtex =       "http://www.hutter1.net/official/bib.htm#cnc",
  url =          "http://arxiv.org/abs/1411.5326",
  pdf =          "http://www.hutter1.net/publ/cnc.pdf",
  slides =       "http://www.hutter1.net/publ/scnc.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "2159-5399",
  isbn =         "978-1-57735-698-1",
  keywords =     "reinforcement learning, compression, Q-value function, policy evaluation,
                  density estimation, on-policy control, Pong, Freeway, Q*Bert",
  abstract =     "This paper describes a new information-theoretic policy evaluation
                  technique for reinforcement learning. This technique converts any
                  compression or density model into a corresponding estimate of
                  value. Under appropriate stationarity and ergodicity conditions, we
                  show that the use of a sufficiently powerful model gives rise to a
                  consistent value function estimator. We also study the behavior of
                  this technique when applied to various Atari 2600 video games,
                  where the use of suboptimal modeling techniques is unavoidable. We
                  consider three fundamentally different models, all too limited to
                  perfectly model the dynamics of the system. Remarkably, we find
                  that our technique provides sufficiently accurate value estimates
                  for effective on-policy control. We conclude with a suggestive
                  study highlighting the potential of our technique to scale to large
                  problems.",
  for =          "080101(100%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 531/1991 = 27\%. Oral 200?/1991=10\%",
}

%-------------Publications-of-Marcus-Hutter-2014--------------%

  @TechReport{Hutter:14cbayeskl,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "Asymptotics of Continuous Bayes for Non-i.i.d. Sources",
  pages =        "1--16",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#cbayeskl",
  url =          "http://arxiv.org/abs/1411.2918",
  pdf =          "http://www.hutter1.net/publ/cbayeskl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  keywords =     "entropy; stochastic process; Bayes; non-stationary; dependence; sequence prediction; compression",
  abstract =     "Clarke and Barron analysed the relative entropy between an
                  i.i.d. source and a Bayesian mixture over a continuous class
                  containing that source. In this paper a comparable result is
                  obtained when the source is permitted to be both non-stationary
                  and dependent. The main theorem shows that Bayesian methods
                  perform well for both compression and sequence prediction even
                  in this most general setting with only mild technical
                  assumptions.",
}
@InProceedings{Hutter:14rladvice,
  author =       "Mayank Daswani and Peter Sunehag and Marcus Hutter",
  title =        "Reinforcement Learning with Value Advice",
  booktitle =    "Proc. 6th Asian Conf. on Machine Learning ({ACML'14})",
  volume =       "39",
  pages =        "299--314",
  _editor =       "Dinh Phung and Hang Li",
  publisher =    "JMLR",
  address =      "Canberra, Australia",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#rladvice",
  pdf =          "http://www.hutter1.net/publ/rladvice.pdf",
  slides =       "http://www.hutter1.net/publ/srladvice.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  issn =         "1532-4435",
  doi =          "http://jmlr.org/proceedings/papers/v39/daswani14.pdf",
  keywords =     "feature reinforcement learning; imitation learning; dataset aggregation;
                  value advice; upper confidence tree; Monte Carlo search; Arcade learning environment.",
  abstract =     "The problem we consider in this paper is reinforcement learning
                  with value advice. In this setting, the agent is given limited
                  access to an oracle that can tell it the expected return (value) of
                  any state-action pair with respect to the optimal policy. The agent
                  must use this value to learn an explicit policy that performs well
                  in the environment. We provide an algorithm called RLAdvice, based
                  on the imitation learning algorithm DAgger. We illustrate the
                  effectiveness of this method in the Arcade Learning Environment on
                  three different games, using value estimates from UCT as advice.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 25/80 = 31\%",
}
@InProceedings{Hutter:14reflect,
  author =       "Di Yang and Srimal Jayawardena and Stephen Gould and Marcus Hutter",
  title =        "Reflective Features Detection and Hierarchical Reflections Separation in Image Sequences",
  booktitle =    "Proc. 16th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'14})",
  pages =        "1--7",
  _editor =       "S.L. Phung and A. Bouzerdoum and P. Ogunbona and W. Li and L. Wang",
  publisher =    "IEEE Xplore",
  address =      "Wollongong, Australia",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#reflect",
  pdf =          "http://www.hutter1.net/publ/reflect.pdf",
  slides =       "http://www.hutter1.net/publ/sreflect.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  doi =          "10.1109/DICTA.2014.7008127",
  isbn =         "978-1-4799-5409-4",
  keywords =     "computer vision; reflection detection; support vector machine; automatic.",
  abstract =     "Computer vision techniques such as Structure-from-Motion (SfM) and
                  object recognition tend to fail on scenes with highly reflective
                  objects because the reflections behave differently to the true
                  geometry of the scene. Such image sequences may be treated as two
                  layers superimposed over each other - the nonreflection scene
                  source layer and the reflection layer. However, decomposing the two
                  layers is a very challenging task as it is ill-posed and common
                  methods rely on prior information. This work presents an automated
                  technique for detecting reflective features with a comprehensive
                  analysis of the intrinsic, spatial, and temporal properties of
                  feature points. A support vector machine (SVM) is proposed to learn
                  reflection feature points. Predicted reflection feature points are
                  used as priors to guide the reflection layer separation. This gives
                  more robust and reliable results than what is achieved by
                  performing layer separation alone.",
  support =      "ControlExpert GmbH",
  for =          "080104(50%),080106(50%)",
}
@Article{Hutter:14pacmdpx,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "Near-Optimal {PAC} bounds for discounted {MDP}s",
  journal =      "Theoretical Computer Science",
  volume =       "558",
  pages =        "125--143",
  publisher =    "Elsevier",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pacmdpx",
  pdf =          "http://www.hutter1.net/publ/pacmdpx.pdf",
  slides =       "http://www.hutter1.net/publ/spacmdp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#agents",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2014.09.029",
  keywords =     "Sample-complexity; PAC bounds; Markov decision processes; Reinforcement learning",
  abstract =     "We study upper and lower bounds on the sample-complexity of
                  learning near-optimal behaviour in finite-state discounted Markov
                  Decision Processes (MDPs). We prove a new bound for a modified
                  version of Upper Confidence Reinforcement Learning (UCRL) with only
                  cubic dependence on the horizon. The bound is unimprovable in all
                  parameters except the size of the state/action space, where it
                  depends linearly on the number of non-zero transition
                  probabilities. The lower bound strengthens previous work by being
                  both more general (it applies to all policies) and tighter. The
                  upper and lower bounds match up to logarithmic factors provided the
                  transition matrix is not too dense.",
  support =      "ARC grant DP120100950",
  for =          "010404(30%),010405(30%),080198(40%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:14ktoptdif,
  author =       "Tansu Alpcan and Tom Everitt and Marcus Hutter",
  title =        "Can we Measure the Difficulty of an Optimization Problem?",
  booktitle =    "{IEEE} Information Theory Workshop",
  pages =        "356--360",
  _editor =       "Yi Hong and Jamie Evans and Emanuele Viterbo and Urbashi Mitra",
  publisher =    "IEEE Press",
  address =      "Hobart, Australia",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ktoptdif",
  pdf =          "http://www.hutter1.net/publ/ktoptdif.pdf",
  slides =       "http://www.hutter1.net/publ/sktoptdif.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "1662-9019",
  isbn =         "978-1-4799-5998-0",
  doi =          "10.1109/ITW.2014.6970853",
  keywords =     "open box optimization; problem complexity; algorithmic information theory",
  abstract =     "Can we measure the difficulty of an optimization
                  problem? Although optimization plays a crucial role in modern
                  science and technology, a formal framework that puts problems
                  and solution algorithms into a broader context has not been
                  established. This paper presents a conceptual approach which
                  gives a positive answer to the question for a broad class of
                  optimization problems. Adopting an information and computational
                  perspective, the proposed framework builds upon Shannon and
                  algorithmic information theories. As a starting point, a concrete
                  model and definition of optimization problems is provided. Then,
                  a formal definition of optimization difficulty is introduced which
                  builds upon algorithmic information theory. Following an initial
                  analysis, lower and upper bounds on optimization difficulty
                  are established. One of the upper-bounds is closely related to
                  Shannon information theory and black-box optimization. Finally,
                  various computational issues and future research directions are
                  discussed.",
  for =          "080401(70%),080198(30%)",
  seo =          "970801(100%)",
}
@InProceedings{Hutter:14epipolar,
  author =       "S. Jayawardena and S. Gould and H. Li and M. Hutter and R. Hartley",
  title =        "Reliable Point Correspondences in Scenes Dominated by Highly Reflective and Largely Homogeneous Surfaces",
  booktitle =    "Proc. 12th Asian Conf. on Computer Vision -- Workshop ({RoLoD@ACCV'14}) Part I",
  address =      "Singapore",
  series =       "LNCS",
  volume =       "9008",
  pages =        "659--674",
  _editor =       "C. V. Jawahar and Shiguang Shan",
  publisher =    "Springer",
  _month =        nov,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#epipolar",
  pdf =          "http://www.hutter1.net/publ/epipolar.pdf",
  slides =       "http://www.hutter1.net/publ/sepipolar.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  issn =         "0302-9743",
  isbn =         "978-3-319-16627-8",
  doi =          "10.1007/978-3-319-16628-5_47",
  keywords =     "point correspondences; reflections; homogeneous; texture impoverished;
                  epipolar geometry; fundamental matrix; structure from motion; noisy.",
  abstract =     "Common Structure from Motion (SfM) tasks require reliable point
                  correspondences in images taken from different views to
                  subsequently estimate model parameters which describe the 3D scene
                  geometry. For example when estimating the fundamental matrix from
                  point correspondences using RANSAC. The amount of noise in the
                  point correspondences drastically affect the estimation algorithm
                  and the number of iterations needed for convergence grows
                  exponentially with the level of noise. In scenes dominated by
                  highly reflective and largely homogeneous surfaces such as vehicle
                  panels and buildings with a lot of glass, existing approaches give
                  a very high proportion of spurious point correspondences. As a
                  result the number of iterations required for subsequent model
                  estimation algorithms become intractable. We propose a novel method
                  that uses descriptors evaluated along points in image edges to
                  obtain a sufficiently high proportion of correct point
                  correspondences. We show experimentally that our method gives
                  better results in recovering the epipolar geometry in scenes
                  dominated by highly reflective and homogeneous surfaces compared to
                  common baseline methods on stereo images taken from considerably
                  wide baselines.",
  support =      "ControlExpert GmbH",
  for =          "080106(100%)",
  seo =          "970108(80%),890205(20%)",
  znote =        "Acceptance rate: 153/307 = 50\%",
}
@InProceedings{Hutter:14martosc,
  author =       "Jan Leike and Marcus Hutter",
  title =        "Indefinitely Oscillating Martingales",
  booktitle =    "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
  address =      "Bled, Slovenia",
  series =       "LNAI",
  volume =       "8776",
  _editor =       "Peter Auer and Alexander Clark",
  publisher =    "Springer",
  pages =        "321--335",
  _month =        oct,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#martosc",
  url =          "http://arxiv.org/abs/1408.3169",
  pdf =          "http://www.hutter1.net/publ/martosc.pdf",
  latex =        "http://www.hutter1.net/publ/martosc.tex",
  slides =       "http://www.hutter1.net/publ/smartosc.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  issn =         "0302-9743",
  isbn =         "978-3-319-11661-7",
  doi =          "10.1007/978-3-319-11662-4_23",
  keywords =     "martingales, infinite oscillations, bounds, convergence
                  rates, minimum description length, mind changes.",
  abstract =     "We construct a class of nonnegative martingale processes
                  that oscillate indefinitely with high probability. For these processes,
                  we state a uniform rate of the number of oscillations for a given magnitude
                  and show that this rate is asymptotically close to the theoretical upper
                  bound. These bounds on probability and expectation of the number of
                  upcrossings are compared to classical bounds from the martingale literature.
                  We discuss two applications. First, our results imply that the
                  limit of the minimum description length operator may not exist. Second,
                  we give bounds on how often one can change one’s belief in a given
                  hypothesis when observing a stream of data.",
  for =          "010405(60%),080101(40%)",
  seo =          "970101(60%),970108(40%)",
  znote =       "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14off2on,
  author =       "Marcus Hutter",
  title =        "Offline to Online Conversion",
  booktitle =    "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
  address =      "Bled, Slovenia",
  series =       "LNAI",
  volume =       "8776",
  _editor =       "Peter Auer and Alexander Clark",
  publisher =    "Springer",
  pages =        "230--244",
  _month =        oct,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#off2on",
  url =          "http://arxiv.org/abs/1407.3334",
  pdf =          "http://www.hutter1.net/publ/off2on.pdf",
  latex =        "http://www.hutter1.net/publ/off2on.tex",
  slides =       "http://www.hutter1.net/publ/soff2on.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#infoth",
  issn =         "0302-9743",
  isbn =         "978-3-319-11661-7",
  doi =          "10.1007/978-3-319-11662-4_17",
  keywords =     "offline; online; batch; sequential; probability; estimation;
                  prediction; time-consistency; normalization; tractable; regret;
                  combinatorics; Bayes; Laplace; Ristad; Good-Turing.",
  abstract =     "We consider the problem of converting offline estimators into an
                  online predictor or estimator with small extra regret. Formally
                  this is the problem of merging a collection of probability
                  measures over strings of length 1,2,3,... into a single
                  probability measure over infinite sequences. We describe various
                  approaches and their pros and cons on various examples. As a
                  side-result we give an elementary non-heuristic purely
                  combinatoric derivation of Turing's famous estimator. Our main
                  technical contribution is to determine the computational
                  complexity of online estimators with good guarantees in general.",
  for =          "080401(30%),080201(30%),010405(40%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14exsagg,
  author =       "Marcus Hutter",
  title =        "Extreme State Aggregation beyond {MDP}s",
  booktitle =    "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
  address =      "Bled, Slovenia",
  series =       "LNAI",
  volume =       "8776",
  _editor =       "Peter Auer and Alexander Clark",
  publisher =    "Springer",
  pages =        "185--199",
  _month =        oct,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#exsagg",
  url =          "http://arxiv.org/abs/1407.3341",
  pdf =          "http://www.hutter1.net/publ/exsagg.pdf",
  latex =        "http://www.hutter1.net/publ/exsagg.tex",
  slides =       "http://www.hutter1.net/publ/sexsagg.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  issn =         "0302-9743",
  isbn =         "978-3-319-11661-7",
  doi =          "10.1007/978-3-319-11662-4_14",
  keywords =     "state aggregation, reinforcement learning, non-MDP.",
  abstract =     "We consider a Reinforcement Learning setup without any (esp.\ MDP)
                  assumptions on the environment. State aggregation and more
                  generally feature reinforcement learning is concerned with mapping
                  histories/raw-states to reduced/aggregated states. The idea behind
                  both is that the resulting reduced process (approximately) forms a
                  small stationary finite-state MDP, which can then be efficiently
                  solved or learnt. We considerably generalize existing aggregation
                  results by showing that even if the reduced process is not an MDP,
                  the (q-)value functions and (optimal) policies of an associated MDP
                  with same state-space size solve the original problem, as long as
                  the solution can approximately be represented as a function of the
                  reduced states. This implies an upper bound on the required state
                  space size that holds uniformly for all RL problems. It may also
                  explain why RL algorithms designed for MDPs sometimes perform well
                  beyond MDPs.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14pacbayes,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "Bayesian Reinforcement Learning with Exploration",
  booktitle =    "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
  address =      "Bled, Slovenia",
  series =       "LNAI",
  volume =       "8776",
  _editor =       "Peter Auer and Alexander Clark",
  publisher =    "Springer",
  pages =        "170--184",
  _month =        oct,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pacbayes",
  pdf =          "http://www.hutter1.net/publ/pacbayes.pdf",
  slides =       "http://www.hutter1.net/publ/spacbayes.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "0302-9743",
  isbn =         "978-3-319-11661-7",
  doi =          "10.1007/978-3-319-11662-4_13",
  keywords =     "reinforcement learning; sample complexity; Bayes-optimal; exploration; PAC bounds",
  abstract =     "We consider a general reinforcement learning problem and show that
                  carefully combining the Bayesian optimal policy and an exploring
                  policy leads to minimax sample-complexity bounds in a very general
                  class of (history-based) environments. We also prove lower bounds
                  and show that the new algorithm displays adaptive behaviour when
                  the environment is easier than worst-case.",
  support =      "ARC grant DP120100950",
  for =          "080101(80%),010404(40%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14learnutm,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Intelligence as Inference or Forcing {O}ccam on the World",
  booktitle =    "Proc. 7th Conf. on Artificial General Intelligence ({AGI'14})",
  series =       "LNAI",
  volume =       "8598",
  pages =        "186--195",
  _editor =       "Ben Goertzel and Laurent Orseau and Javier Snaider",
  publisher =    "Springer",
  address =      "Quebec City, Canada",
  _month =        aug,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#learnutm",
  pdf =          "http://www.hutter1.net/publ/learnutm.pdf",
  slides =       "http://www.hutter1.net/publ/slearnutm.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-319-09274-4_18",
  issn =         "0302-9743",
  isbn =         "978-3-319-09273-7",
  keywords =     "Ockham; Universal; Intelligence; Learning; Turing Machine;
                  Expectation Maximization; Evolution; Reasoning; Agents; Reward.",
  abstract =     "We propose to perform the optimization task of Universal Artificial
                  Intelligence (UAI) through learning  a reference machine on which
                  good programs are short. Further, we also acknowledge that the
                  choice of reference machine that the UAI objective is based on is
                  arbitrary and, therefore, we learn a suitable machine for the
                  environment we are in. This is based on viewing Occam's razor as an
                  imperative instead of as a proposition about the world. Since this
                  principle cannot be true for all reference machines, we need to
                  find a machine that makes the principle true. We both want good
                  policies and the environment to have short implementations on the
                  machine. Such a machine is learnt iteratively through a procedure
                  that generalizes the principle underlying the
                  Expectation-Maximization algorithm.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(80%),970122(20%)",
  znote =        "Acceptance rate: 22/65 = 34\%.",
}
@InProceedings{Hutter:14optcog,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "A Dual Process Theory of Optimistic Cognition",
  booktitle =    "Proc. 36th Annual Meeting of the Cognitive Science Society ({CogSci'14})",
  pages =        "2949--2954",
  _editor =       "Paul Bello and Marcello Guarini and Marjorie McShane and Brian Scassellati",
  publisher =    "Curran Associates",
  address =      "Quebec City, Canada",
  _month =        jul,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#optcog",
  http =         "http://mindmodeling.org/cogsci2014/papers/509/paper509.pdf",
  pdf =          "http://www.hutter1.net/publ/optcog.pdf",
  slides =       "http://www.hutter1.net/publ/soptcog.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  isbn =         "978-1-63439-116-0",
  keywords =     "Rationality, Optimism, Optimality, Reinforcement Learning",
  abstract =     "Optimism is a prevalent bias in human cognition including
                  variations like self-serving beliefs, illusions of control and
                  overly positive views of one’s own future. Further, optimism
                  has been linked with both success and happiness. In fact, it has
                  been described as a part of human mental well-being which has
                  otherwise been assumed to be about being connected to reality.
                  In reality, only people suffering from depression are realistic.
                  Here we study a formalization of optimism within a dual process
                  framework and study its usefulness beyond human needs
                  in a way that also applies to artificial reinforcement learning
                  agents. Optimism enables systematic exploration which is essential
                  in an (partially) unknown world. The key property of
                  an optimistic hypothesis is that if it is not contradicted when
                  one acts greedily with respect to it, then one is well rewarded
                  even if it is wrong.",
  support =      "ARC grant DP120100950",
  for =          "080101(50%),170202(50%)",
  seo =          "970108(70%),970117(30%)",
}
@InProceedings{Hutter:14frlabs,
  author =       "Mayank Daswani and Peter Sunehag and Marcus Hutter",
  title =        "Feature Reinforcement Learning: State of the Art",
  booktitle =    "Proc. Workshops at the 28th {AAAI} Conference on Artificial Intelligence:
                  Sequential Decision Making with Big Data",
  pages =        "2--5",
  _editor =       "Amir-Massoud Farahmand et al.",
  publisher =    "AAAI Press",
  address =      "Quebec City, Canada",
  _month =        jul,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#frlabs",
  http =         "http://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/view/8791",
  pdf =          "http://www.hutter1.net/publ/frlabs.pdf",
  slides =       "http://www.hutter1.net/publ/sfrlabs.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  keywords =     "Reinforcement learning; temporal difference learning;
                  partial observability; Q-learning; feature learning;
                  function approximation; rational agents.",
  abstract =     "Feature reinforcement learning was introduced five years ago
                  as a principled and practical approach to history-based learn-
                  ing. This paper examines the progress since its inception. We
                  now have both model-based and model-free cost functions,
                  most recently extended to the function approximation setting.
                  Our current work is geared towards playing ATARI games us-
                  ing imitation learning, where we use Feature RL as a feature
                  selection method for high-dimensional domains",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(100%)",
  znote =        "http://sites.google.com/site/decisionmakingbigdata/",
}
@InProceedings{Hutter:14floud,
  author =       "Tom Everitt and Tor Lattimore and Marcus Hutter",
  title =        "Free Lunch for Optimisation under the Universal Distribution",
  booktitle =    "Proc. 2014 Congress on Evolutionary Computation ({CEC'14})",
  pages =        "167--174",
  _editor =       "Derong Liu and Jennie Si",
  publisher =    "IEEE",
  address =      "Beijing, China",
  _month =        jul,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#floud",
  url =          "http://arxiv.org/abs/1608.04544",
  pdf =          "http://www.hutter1.net/publ/floud.pdf",
  slides =       "http://www.hutter1.net/publ/sfloud.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  isbn =         "978-1-4799-6626-4",
  doi =          "10.1109/CEC.2014.6900546",
  keywords =     "function optimization; universal prior; Occam's Razor; No Free Lunch.",
  abstract =     "Function optimisation is a major challenge in computer science. The
                  No Free Lunch theorems state that if all functions with the same
                  histogram are assumed to be equally probable then no algorithm
                  outperforms any other in expectation. We argue against the uniform
                  assumption and suggest a universal prior exists for which there is
                  a free lunch, but where no particular class of functions is
                  favoured over another. We also prove upper and lower bounds on the
                  size of the free lunch.",
  for =          "080199(70%),010404(30%)",
  seo =          "970108(100%)",
}
@Article{Hutter:14tcdiscx,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "General Time Consistent Discounting",
  journal =      "Theoretical Computer Science",
  volume =       "519",
  pages =        "140--154",
  publisher =    "Elsevier",
  _month =        jan,
  year =         "2014",
  bibtex =       "http://www.hutter1.net/official/bib.htm#tcdiscx",
  pdf =          "http://www.hutter1.net/publ/tcdiscx.pdf",
  slides =       "http://www.hutter1.net/publ/stcdisc.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#agents",
  issn =         "0304-3975",
  doi =          "10.1016/j.tcs.2013.09.022",
  keywords =     "Rational agents; sequential decision theory;
                  general discounting; time-consistency; game theory.",
  abstract =     "Modeling inter-temporal choice is a key problem in both computer
                  science and economic theory. The discounted utility model of
                  Samuelson is currently the most popular model for measuring the
                  global utility of a time-series of local utilities. The model is
                  limited by not allowing the discount function to change with the
                  age of the agent. This is despite the fact that many agents, in
                  particular humans, are best modelled with age-dependent discount
                  functions. It is well known that discounting can lead to
                  time-inconsistent behaviour where agents change their preferences
                  over time. In this paper we generalise the discounted utility model
                  to allow age-dependent discount functions. We then extend previous
                  work in time-inconsistency to our new setting, including a complete
                  characterisation of time-(in)consistent discount functions, the
                  existence of sub-game perfect equilibrium policies where the
                  discount function is time-inconsistent and a continuity result
                  showing that ``nearly'' time-consistent discount rates lead to
                  ``nearly'' time-consistent behaviour.",
  for =          "010405(20%),080101(40%),140104(20%),170202(20%)",
  seo =          "970108(40%),970114(30%),970117(30%)",
}

%-------------Publications-of-Marcus-Hutter-2013--------------%

@Article{Hutter:13uai4lay,
  author =       "Marcus Hutter",
  title =        "To Create a Super-Intelligent Machine, Start with an Equation",
  journal =      "The Conversation",
  volume =       "November",
  number =       "29",
  pages =        "1--5",
  _month =        nov,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uai4lay",
  url =          "http://theconversation.com/to-create-a-super-intelligent-machine-start-with-an-equation-20756",
  pdf =          "http://www.hutter1.net/publ/uai4lay.pdf",
  slides =       "http://www.hutter1.net/publ/suai4lay.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  keywords =     "intelligence; mathematics; learning; planning; rational agents; foundations.",
  abstract =     "Intelligence is a very difficult concept and, until recently, no
                  one has succeeded in giving it a satisfactory formal definition.
                  Most researchers have given up grappling with the notion of
                  intelligence in full generality, and instead focus on related but
                  more limited concepts – but I argue that mathematically defining
                  intelligence is not only possible, but crucial to understanding and
                  developing super-intelligent machines. From this, my research group
                  has even successfully developed software that can learn to play
                  Pac-Man from scratch.",
  for =          "080401(20%),080101(30%),080199(30%),220399(20%)",
  seo =          "970108(80%),870122(20%)",
  znote =        "Top 10 of 700+ ANU articles till 2013. 25'000+ views.
                  http://theconversation.com/metrics/institutions/australian-national-university/article_leaderboard",
}
@InProceedings{Hutter:13rlqh,
  author =       "Mayank Daswani and Peter Sunehag and Marcus Hutter",
  title =        "Q-Learning for History-Based Reinforcement Learning",
  booktitle =    "Proc. 5th Asian Conf. on Machine Learning ({ACML'13})",
  volume =       "29",
  pages =        "213--228",
  _editor =       "Tu Bao Ho and Cheng Soon Ong",
  publisher =    "JMLR",
  address =      "Canberra, Australia",
  _month =        nov,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#rlqh",
  http =         "http://proceedings.mlr.press/v29/Daswani13.html",
  pdf =          "http://www.hutter1.net/publ/rlqh.pdf",
  slides =       "http://www.hutter1.net/publ/srlqh.pdf",
  poster =       "http://www.hutter1.net/publ/prlqh.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#frl",
  issn =         "1532-4435",
  doi =          "http://jmlr.org/proceedings/papers/v29/Daswani13.pdf",
  keywords =     "feature reinforcement learning; temporal difference learning;
                  Markov decision process; partial observability; Q-learning;
                  Monte Carlo search; Pocman; rational agents.",
  abstract =     "We extend the Q-learning algorithm from the Markov Decision Process
                  setting to problems where observations are non-Markov and do not
                  reveal the full state of the world i.e. to POMDPs. We do this in a
                  natural manner by adding l0 regularisation to the pathwise squared
                  Q-learning objective function and then optimise this over both a
                  choice of map from history to states and the resulting MDP
                  parameters. The optimisation procedure involves a stochastic search
                  over the map class nested with classical Q-learning of the
                  parameters. This algorithm fits perfectly into the feature
                  reinforcement learning framework, which chooses maps based on a
                  cost criteria. The cost criterion used so far for feature
                  reinforcement learning has been model-based and aimed at predicting
                  future states and rewards. Instead we directly predict the return,
                  which is what is needed for choosing optimal actions. Our
                  Q-learning criteria also lends itself immediately to a function
                  approximation setting where features are chosen based on the
                  history. This algorithm is somewhat similar to the recent line of
                  work on lasso temporal difference learning which aims at finding a
                  small feature set with which one can perform policy evaluation. The
                  distinction is that we aim directly for learning the Q-function of
                  the optimal policy and we use l0 instead of l1 regularisation. We
                  perform an experimental evaluation on classical benchmark domains
                  and find improvement in convergence speed as well as in economy of
                  the state representation. We also compare against MC-AIXI on the
                  large Pocman domain and achieve competitive performance in average
                  reward. We use less than half the CPU time and 36 times less
                  memory. Overall, our algorithm hQL provides a better combination of
                  computational, memory and data efficiency than existing algorithms in
                  this setting.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(100%)",
  znote =        "long presentation: Acceptance rate: 13/103 = 13\%",
}
@Article{Hutter:13problogic,
  author =       "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther",
  title =        "Probabilities on Sentences in an Expressive Logic",
  journal =      "Journal of Applied Logic",
  volume =       "11",
  pages =        "386--420",
  _publisher =    "Elsevier",
  _month =        nov,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#problogic",
  url =          "http://arxiv.org/abs/1209.2620",
  pdf =          "http://www.hutter1.net/publ/problogic.pdf",
  latex =        "http://www.hutter1.net/publ/problogic.tex",
  slides =       "http://www.hutter1.net/publ/sproblogic.pdf",
  video =        "http://www.youtube.com/watch?v=WEkZSHcRsAM",
  project =      "http://www.hutter1.net/official/projects.htm#logic",
  doi =          "10.1016/j.jal.2013.03.003",
  issn =         "1570-8683",
  keywords =     "higher-order logic; probability on sentences;
                  Gaifman; Cournot; Bayes; induction; confirmation;
                  learning; prior; knowledge; entropy.",
  abstract =     "Automated reasoning about uncertain knowledge has many applications.
                  One difficulty when developing such systems is the lack
                  of a completely satisfactory integration of logic and probability.
                  We address this problem directly.
                     Expressive languages like higher-order logic are ideally suited
                  for representing and reasoning about structured knowledge.
                  Uncertain knowledge can be modeled by using graded probabilities
                  rather than binary truth-values.
                     The main technical problem studied in this paper is the following:
                  Given a set of sentences, each having some probability of being true,
                  what probability should be ascribed to other (query) sentences?
                     A natural wish-list, among others, is that the probability distribution
                  (i) is consistent with the knowledge base,
                  (ii) allows for a consistent inference procedure and in particular
                  (iii) reduces to deductive logic in the limit of probabilities being 0 and 1,
                  (iv) allows (Bayesian) inductive reasoning and
                  (v) learning in the limit and in particular
                  (vi) allows confirmation of universally quantified hypotheses/sentences.
                     We translate this wish-list into technical requirements for a prior probability
                  and show that probabilities satisfying all our criteria exist.
                  We also give explicit constructions and several general
                  characterizations of probabilities that satisfy some or all of
                  the criteria and various (counter) examples.
                     We also derive necessary and sufficient conditions for
                  extending beliefs about finitely many sentences to suitable
                  probabilities over all sentences,
                  and in particular least dogmatic or least biased ones.
                     We conclude with a brief outlook on how the developed theory might
                  be used and approximated in autonomous reasoning agents.
                  Our theory is a step towards a globally consistent and
                  empirically satisfactory unification of probability and logic.",
  support =      "ARC grant DP0877635",
  for =          "080203(50%),010404(30%),080401(10%),080101(10%)",
  seo =          "970108(80%),970101(20%)",
  znote =        "Presented at Progic 2011: http://sites.google.com/site/progicconference2011/
                  and at WL4AI@IJCAI 2013: http://ijcai13.org/program/workshop/32",
}
@InProceedings{Hutter:13ksaprob,
  author =       "Laurent Orseau and Tor Lattimore and Marcus Hutter",
  title =        "Universal Knowledge-Seeking Agents for Stochastic Environments",
  booktitle =    "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})",
  address =      "Singapore",
  series =       "LNAI",
  volume =       "8139",
  _editor =       "S. Jain and R. Munos and F. Stephan and Th. Zeugmann",
  publisher =    "Springer",
  pages =        "158--172",
  _month =        oct,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ksaprob",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/",
  pdf =          "http://www.hutter1.net/publ/ksaprob.pdf",
  slides =       "http://www.hutter1.net/publ/sksaprob.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-40935-6_12",
  issn =         "0302-9743",
  isbn =         "978-3-642-40934-9",
  keywords =     "Universal artificial intelligence; exploration; reinforcement learning;
                  algorithmic information theory; Solomonoff induction.",
  abstract =     "We define an optimal Bayesian knowledge-seeking agent, KL-KSA,
                  designed for countable hypothesis classes of stochastic
                  environments and whose goal is to gather as much information about
                  the unknown world as possible. Although this agent works for
                  arbitrary countable classes and priors, we focus on the especially
                  interesting case where all stochastic computable environments are
                  considered and the prior is based on Solomonoff's universal prior.
                  Among other properties, we show that KL-KSA learns the true
                  environment in the sense that it learns to predict the consequences
                  of actions it does not take. We show that it does not consider
                  noise to be information and avoids taking actions leading to
                  inescapable traps. We also present a variety of toy experiments
                  demonstrating that KL-KSA behaves according to expectation.",
  for =          "080101(50%),080199(50%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 23/39 = 59\%",
}
@InProceedings{Hutter:13ccbayessp,
  author =       "Tor Lattimore and Marcus Hutter and Peter Sunehag",
  title =        "Concentration and Confidence for Discrete Bayesian Sequence Predictors",
  booktitle =    "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})",
  address =      "Singapore",
  series =       "LNAI",
  volume =       "8139",
  _editor =       "S. Jain and R. Munos and F. Stephan and Th. Zeugmann",
  publisher =    "Springer",
  pages =        "324--338",
  _month =        oct,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ccbayessp",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/",
  url =          "http://arxiv.org/abs/1307.0127",
  pdf =          "http://www.hutter1.net/publ/ccbayessp.pdf",
  slides =       "http://www.hutter1.net/publ/sccbayessp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  doi =          "10.1007/978-3-642-40935-6_23",
  issn =         "0302-9743",
  isbn =         "978-3-642-40934-9",
  keywords =     "Bayesian sequence prediction; concentration of measure;
                  information theory; KWIK learning.",
  abstract =     "Bayesian sequence prediction is a simple technique for predicting
                  future symbols sampled from an unknown measure on infinite
                  sequences over a countable alphabet. While strong bounds on the
                  expected cumulative error are known, there are only limited results
                  on the distribution of this error. We prove tight high-probability
                  bounds on the cumulative error, which is measured in terms of the
                  Kullback-Leibler (KL) divergence. We also consider the problem of
                  constructing upper confidence bounds on the KL and Hellinger errors
                  similar to those constructed from Hoeffding-like bounds in the
                  i.i.d. case. The new results are applied to show that Bayesian
                  sequence prediction can be used in the Knows What It Knows (KWIK)
                  framework with bounds that match the state-of-the-art.",
  support =      "ARC grant DP120100950",
  for =          "010405(70%),010404(30%)",
  seo =          "970101(100%)",
  znote =        "Acceptance rate: 23/39 = 59\%",
}
@Proceedings{Hutter:13ewrlabs,
  editor =       "Peter Auer and Marcus Hutter and Laurent Orseau",
  title =        "Reinforcement Learning",
  subtitle =     "Dagstuhl Seminar 13321 ({EWRL'13})",
  publisher =    "Schloss Dagstuhl -- Leibniz-Zentrum fuer Informatik",
  address =      "Dagstuhl, Germany",
  volume =       "3",
  number =       "8",
  _month =        aug,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ewrlabs13",
  url =          "http://drops.dagstuhl.de/opus/volltexte/2013/4340/",
  pdf =          "http://www.hutter1.net/publ/ewrlabs13.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#other",
  issn =         "2192-5283",
  doi =          "10.4230/DagRep.3.8.1",
  keywords =     "Machine Learning, Reinforcement Learning, Markov Decision Processes, Planning",
  abstract =     "This Dagstuhl Seminar also stood as the 11th European Workshop on
                  Reinforcement Learning (EWRL11). Reinforcement learning gains more
                  and more attention each year, as can be seen at the various
                  conferences (ECML, ICML, IJCAI, ...). EWRL, and in particular this
                  Dagstuhl Seminar, aimed at gathering people interested in
                  reinforcement learning from all around the globe. This unusual
                  format for EWRL helped viewing the field and discussing topics
                  differently.",
  for =          "080101(50%),080198(50%)",
  seo =          "970108(100%)"
}
@InProceedings{Hutter:13problogics,
  author =       "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther",
  title =        "Unifying Probability and Logic for Learning",
  booktitle =    "Proc. 2nd Workshop on Weighted Logics for AI ({WL4AI'13})",
  _volume =       "2",
  pages =        "65--72",
  _editor =       "Lluis Godo and Henri Prade and Guilin Qi",
  publisher =    "",
  address =      "Beijing, China",
  _month =        aug,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#problogics",
  url =          "http://ijcai13.org/program/workshop/32",
  http =         "http://www.iiia.csic.es/wl4ai-2013/working_papers",
  pdf =          "http://www.hutter1.net/publ/problogics.pdf",
  slides =       "http://www.hutter1.net/publ/sproblogic.pdf",
  video =        "http://www.youtube.com/watch?v=WEkZSHcRsAM",
  project =      "http://www.hutter1.net/official/projects.htm#logic",
  conf =         "http://www.iiia.csic.es/wl4ai-2013/",
  keywords =     "higher-order logic; probability on sentences;
                  Gaifman; Cournot; Bayes; induction; confirmation;
                  learning; prior; knowledge; entropy.",
  abstract =     "Uncertain knowledge can be modeled by using graded probabilities
                  rather than binary truth-values, but so far a completely satisfactory
                  integration of logic and probability has been lacking.
                  In particular the inability of confirming universal hypotheses
                  has plagued most if not all systems so far.
                  We address this problem head on.
                     The main technical problem to be discussed is the following:
                  Given a set of sentences, each having some probability of being true,
                  what probability should be ascribed to other (query) sentences?
                     A natural wish-list, among others, is that the probability distribution
                  (i) is consistent with the knowledge base,
                  (ii) allows for a consistent inference procedure and in particular
                  (iii) reduces to deductive logic in the limit of probabilities being 0 and 1,
                  (iv) allows (Bayesian) inductive reasoning and
                  (v) learning in the limit and in particular
                  (vi) allows confirmation of universally quantified hypotheses/sentences.
                     We show that probabilities satisfying (i)-(vi) exist,
                  and present necessary and sufficient conditions (Gaifman and Cournot).
                    The theory is a step towards a globally consistent and
                  empirically satisfactory unification of probability and logic.",
  support =      "ARC grant DP0877635",
  for =          "080203(50%),010404(30%),080401(10%),080101(10%)",
  seo =          "970108(80%),970101(20%)",
  znote =        "Only appears on WS/IJCAI website",
}
@InProceedings{Hutter:13agscilaws,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Learning Agents with Evolving Hypothesis Classes",
  booktitle =    "Proc. 6th Conf. on Artificial General Intelligence ({AGI'13})",
  series =       "LNAI",
  volume =       "7999",
  pages =        "150--159",
  _editor =       "Kai-Uwe Kuehnberger and Sebastian Rudolph and Pei Wang",
  publisher =    "Springer, Heidelberg",
  _address =      "Beijing, China",
  _month =        jul,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#agscilaws",
  pdf =          "http://www.hutter1.net/publ/agscilaws.pdf",
  slides =       "http://www.hutter1.net/publ/sagscilaws.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-39521-5_16",
  issn =         "0302-9743",
  isbn =         "978-3-642-39520-8",
  abstract =     "It has recently been shown that a Bayesian agent with a universal
                  hypothesis class resolves most induction problems discussed in the
                  philosophy of science. These ideal agents are, however, neither practical
                  nor a good model for how real science works. We here introduce a framework
                  for learning based on implicit beliefs over all possible hypotheses
                  and limited sets of explicit theories sampled from an implicit distribution
                  represented only by the process by which it generates new hypotheses.
                  We address the questions of how to act based on a limited set of theories
                  as well as what an ideal sampling process should be like. Finally,
                  we discuss topics in philosophy of science and cognitive science from the
                  perspective of this framework.",
  support =      "ARC grant DP120100950",
  for =          "080101(100%)",
  seo =          "970108(80%),970122(20%)",
  znote =        "Acceptance rate: 20/42 = 48\%.",
}
@Article{Hutter:13pacgrl,
  author =       "Tor Lattimore and Marcus Hutter and Peter Sunehag",
  title =        "The Sample-Complexity of General Reinforcement Learning",
  journal =      "Journal of Machine Learning Research, W\&CP: ICML",
  volume =       "28",
  number =       "3",
  pages =        "28--36",
  _editor =       "S. Dasgupta and D. McAllester",
  publisher =    "",
  _address =     "Atlanta, Georgia, USA",
  _month =        jun,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pacgrl",
  http =         "http://jmlr.org/proceedings/papers/v28/lattimore13.html",
  url =          "http://arxiv.org/abs/1308.4828",
  pdf =          "http://www.hutter1.net/publ/pacgrl.pdf",
  latex =        "http://www.hutter1.net/publ/pacgrl.tex",
  slides =       "http://www.hutter1.net/publ/spacgrl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#agent",
  issn =         "1532-4435",
  keywords =     "reinforcement learning; sample complexity; PAC bounds",
  abstract =     "We present a new algorithm for general reinforcement learning where
                  the true environment is known to belong to a finite class of N
                  arbitrary models. The algorithm is shown to be near-optimal for all
                  but O(N log^2 N) timesteps with high probability. Infinite classes
                  are also considered where we show that compactness is a key
                  criterion for determining the existence of uniform
                  sample-complexity bounds. A matching lower bound is given for the
                  finite case.",
  support =      "ARC grant DP120100950",
  for =          "010405(50%),080199(50%). See CD4/Projects/Grant-Info.txt for more",
  seo =          "970108(100%)",
}
@Article{Hutter:13sad,
  author =       "Marcus Hutter",
  title =        "Sparse Adaptive {D}irichlet-Multinomial-like Processes",
  journal =      "Journal of Machine Learning Research, W\&CP: COLT",
  volume =       "30",
  pages =        "432--459",
  _month =        jun,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#sad",
  url =          "http://arxiv.org/abs/1305.3671",
  pdf =          "http://www.hutter1.net/publ/sad.pdf",
  latex =        "http://www.hutter1.net/publ/sad.tex",
  slides =       "http://www.hutter1.net/publ/ssad.pdf",
  audio =        "http://vmc.aarnet.edu.au/userdata/0b/0b4d5c6f-e775-4d48-8b47-32dc95d19b8b/ingest1685426376076922317.asf",
  project =      "http://www.hutter1.net/official/projects.htm#infoth",
  issn =         "1532-4435",
  keywords =     "sparse coding; adaptive parameters; Dirichlet-Multinomial;
                  Polya urn; data-dependent redundancy bound;
                  small/large alphabet; data compression.",
  abstract =     "Online estimation and modelling of i.i.d. data for short
                  sequences over large or complex ``alphabets'' is a ubiquitous
                  (sub)problem in machine learning, information theory, data
                  compression, statistical language processing, and document
                  analysis. The Dirichlet-Multinomial distribution (also called
                  Polya urn scheme) and extensions thereof are widely applied for
                  online i.i.d. estimation. Good a-priori choices for the
                  parameters in this regime are difficult to obtain though. I
                  derive an optimal adaptive choice for the main parameter via
                  tight, data-dependent redundancy bounds for a related model. The
                  1-line recommendation is to set the 'total mass' = 'precision' =
                  'concentration' parameter to m/2ln[(n+1)/m], where n
                  is the (past) sample size and m the number of different symbols
                  observed (so far). The resulting estimator is simple, online,
                  fast, and experimental performance is superb.",
  for =          "080401(70%),010405(30%)",
  seo =          "970108(100%)",
  znote =        "26th Annual Conf. on Learning Theory. Acceptance rate: 47/98 = 48\%",
}
@InProceedings{Hutter:13mnonconv,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "On {M}artin-L{\"o}f Convergence of {S}olomonoff's Mixture",
  booktitle =    "Proc. 10th Annual Conference on Theory and
                  Applications of Models of Computation ({TAMC'13})",
  volume =       "7876",
  pages =        "212--223",
  series =       "LNCS",
  _editor =       "T-H.H. Chan and L.C. Lau and L. Trevisan",
  publisher =    "Springer",
  address =      "Hong Kong, China",
  _month =        may,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mnonconv",
  pdf =          "http://www.hutter1.net/publ/mnonconv.pdf",
  slides =       "http://www.hutter1.net/publ/smnonconv.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "0302-9743",
  isbn =         "978-3-642-38235-2",
  doi =          "10.1007/978-3-642-38236-9_20",
  keywords =     "Solomonoff induction, Kolmogorov complexity, theory of computation.",
  abstract =     "We study the convergence of Solomonoff's universal mixture
                  on individual Martin-L{\"o}f random sequences. A new result is presented
                  extending the work of Hutter and Muchnik (2004) by showing that there
                  does not exist a universal mixture that converges on all Martin-L{\"o}f
                  random sequences.",
  for =          "080401(50%),010404(30%),010405(20%)",
  seo =          "970101(30%),970108(70%)",
  znote =        "Acceptance rate: 31/70 = 44\%",
}
@Article{Hutter:13alttcs,
  author =       "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
  title =        "{ALT'10} Special Issue",
  journal =      "Theoretical Computer Science",
  editor =       "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
  volume =       "473",
  publisher =    "Elsevier",
  pages =        "1--3/178",
  _month =        feb,
  year =         "2013",
  bibtex =       "http://www.hutter1.net/official/bib.htm#alt10tcs",
  http =         "http://www.sciencedirect.com/science/journal/03043975/473",
  doi =          "10.1016/j.tcs.2012.10.007",
  issn =         "0304-3975",
  keywords =     "algorithmic learning theory, special issue, preface",
  abstract =     "This special issue contains expanded versions of papers that appeared in
                  preliminary form in the proceedings of the 21st International Conference
                  on Algorithmic Learning Theory (ALT 2010), which was held in Canberra,
                  Australia during October 6--8, 2010.  \emph{Algorithmic Learning Theory} is
                  a conference series which is dedicated to the theoretical study of the
                  algorithmic aspects of learning.  The best papers of the conference ALT 2010
                  were invited for this special issue and after a thorough reviewing process,
                  most of them qualified for this Special Issue on Algorithmic Learning Theory
                  of Theoretical Computer Science. The preface contains a short introduction
                  to each of these papers.",
  for =          "080401(20%),010405(20%),080199(60%)",
  seo =          "970108(100%)",
}

%-------------Publications-of-Marcus-Hutter-2012--------------%

@Article{Hutter:12lstphi,
  author =       "Mayank Daswani and Peter Sunehag and Marcus Hutter",
  title =        "Feature Reinforcement Learning using Looping Suffix Trees",
  journal =      "Journal of Machine Learning Research, W\&CP",
  volume =       "24",
  pages =        "11--23",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lstphi",
  http =         "http://proceedings.mlr.press/v24/daswani12a.html",
  pdf =          "http://www.hutter1.net/publ/lstphi.pdf",
  latex =        "http://www.hutter1.net/publ/lstphi.tex",
  slides =       "http://www.hutter1.net/publ/slstphi.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "1532-4435",
  keywords =     "looping suffix trees; Markov decision process;
                  reinforcement learning; partial observability;
                  information \& complexity; Monte Carlo search; rational agents.",
  abstract =     "There has recently been much interest in history-based methods
                  using suffix trees to solve POMDPs. However, these suffix trees
                  cannot efficiently represent environments that have long-term
                  dependencies. We extend the recently introduced CT$\Phi$MDP
                  algorithm to the space of looping suffix trees which have
                  previously only been used in solving deterministic POMDPs. The
                  resulting algorithm replicates results from CT$\Phi$MDP for
                  environments with short term dependencies, while it outperforms
                  LSTM-based methods on TMaze, a deep memory environment.",
  support =      "ARC grant DP120100950",
  for =          "080401(20%),010405(20%),080101(60%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:12aixiens,
  author =       "Joel Veness and Peter Sunehag and Marcus Hutter",
  title =        "On Ensemble Techniques for {AIXI} Approximation",
  booktitle =    "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})",
  series =       "LNAI",
  volume =       "7716",
  pages =        "341--351",
  _editor =       "J. Bach and B. Goertzel and M. Ikle",
  publisher =    "Springer, Heidelberg",
  _address =      "Oxford, UK",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiens",
  pdf =          "http://www.hutter1.net/publ/aixiens.pdf",
  slides =       "http://www.hutter1.net/publ/saixiens.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-35506-6_35",
  issn =         "0302-9743",
  isbn =         "978-3-642-35505-9",
  keywords =     "Ensemble Techniques; AIXI; Universal Artificial Intelligence;
                  Agent Architectures; Perception and Perceptual Modeling.",
  abstract =     "One of the key challenges in AIXI approximation is model class
                  approximation - i.e. how to meaningfully approximate Solomonoff
                  Induction without requiring an infeasible amount of computation?
                  This paper advocates a bottom-up approach to this problem, by
                  describing a number of principled ensemble techniques for
                  approximate AIXI agents. Each technique works by efficiently
                  combining a set of existing environment models into a single, more
                  powerful model. These techniques have the potential to play an
                  important role in future AIXI approximations.",
  support =      "ARC grant DP120100950",
  for =          "080401(20%),010404(30%),080101(30%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 34/80 = 42\%.",
}
@InProceedings{Hutter:12aixiopt,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Optimistic {AIXI}",
  booktitle =    "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})",
  series =       "LNAI",
  volume =       "7716",
  pages =        "312--321",
  _editor =       "J. Bach and B. Goertzel and M. Ikle",
  publisher =    "Springer, Heidelberg",
  _address =      "Oxford, UK",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiopt",
  pdf =          "http://www.hutter1.net/publ/aixiopt.pdf",
  slides =       "http://www.hutter1.net/publ/saixiopt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-35506-6_32",
  issn =         "0302-9743",
  isbn =         "978-3-642-35505-9",
  keywords =     "AIXI; Reinforcement Learning; Optimism; Optimality; Agents; Prior Sets; Bets.",
  abstract =     "We consider extending the AIXI agent by using multiple (or even a
                  compact class of) priors. This has the benefit of weakening the
                  conditions on the true environment that we need to prove asymptotic
                  optimality. Furthermore, it decreases the arbitrariness of picking
                  the prior or reference machine. We connect this to removing symmetry
                  between accepting and rejecting bets in the rationality
                  axiomatization of AIXI and replacing it with optimism. Optimism is
                  often used to encourage exploration in the more restrictive Markov
                  Decision Process setting and it alleviates the problem that AIXI
                  (with geometric discounting) stops exploring prematurely.",
  support =      "ARC grant DP120100950",
  for =          "080101(70%),220302(30%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 34/80 = 42\%.",
}
@InProceedings{Hutter:12windowkt,
  author =       "Peter Sunehag and Wen Shao and Marcus Hutter",
  title =        "Coding of Non-Stationary Sources as a Foundation for Detecting Change Points and Outliers in Binary Time-Series",
  booktitle =    "Proc. 10th Australasian Data Mining Conference ({AusDM'12})",
  volume =       "134",
  pages =        "79--84",
  _editor =       "Yanchang Zhao and Jiuyong Li and Paul Kennedy and Peter Christen",
  publisher =    "Australian Computer Society",
  address =      "Sydney, Australia",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#windowkt",
  http =         "http://crpit.com/abstracts/CRPITV134Sunehag.html",
  pdf =          "http://www.hutter1.net/publ/windowkt.pdf",
  tex =          "http://www.hutter1.net/publ/windowkt.tex",
  slides =       "http://www.hutter1.net/publ/swindowkt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  issn =         "1445-1336",
  isbn =         "978-1-921770-14-2",
  keywords =     "non-stationary sources; time-series; change point detection; outlier; compression",
  abstract =     "An interesting scheme for estimating and adapting distributions in
                  real-time for non-stationary data has recently been the focus of
                  study for several different tasks relating to time series and data
                  mining, namely change point detection, outlier detection and online
                  compression/ sequence prediction. An appealing feature is that
                  unlike more sophisticated procedures, it is as fast as the related
                  stationary procedures which are simply modified through discounting
                  or windowing. The discount scheme makes older observations lose
                  their in uence on new predictions. The authors of this article
                  recently used a discount scheme for introducing an adaptive version
                  of the Context Tree Weighting compression algorithm. The mentioned
                  change point and outlier detection methods rely on the changing
                  compression ratio of an online compression algorithm. Here we are
                  beginning to provide theoretical foundations for the use of these
                  adaptive estimation procedures that have already shown practical
                  promise.",
  support =      "ARC grant DP120100950",
  for =          "080401",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 25/55 = 45\%",
}
@InProceedings{Hutter:12optopt,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Optimistic Agents are Asymptotically Optimal",
  booktitle =    "Proc. 25th Australasian Joint Conference on Artificial Intelligence ({AusAI'12})",
  series =       "LNAI",
  volume =       "7691",
  pages =        "15--26",
  _editor =       "Michael Thielscher and Dongmo Zhang",
  publisher =    "Springer",
  address =      "Sydney, Australia",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#optopt",
  url =          "http://arxiv.org/abs/1210.0077",
  pdf =          "http://www.hutter1.net/publ/optopt.pdf",
  latex =        "http://www.hutter1.net/publ/optopt.tex",
  slides =       "http://www.hutter1.net/publ/soptopt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "0302-9743",
  isbn =         "978-3-642-35100-6",
  doi =          "10.1007/978-3-642-35101-3_2",
  keywords =     "Reinforcement Learning; Optimism; Optimality; Agents; Uncertainty.",
  abstract =     "We use optimism to introduce generic asymptotically optimal
                  reinforcement learning agents. They achieve, with an arbitrary
                  finite or compact class of environments, asymptotically optimal
                  behavior. Furthermore, in the finite deterministic case we provide
                  finite error bounds.",
  support =      "ARC grant DP120100950",
  for =          "080101(70%),220302(30%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 76/196 = 39\%",
}
@InCollection{Hutter:12ctoe2,
  author =       "Marcus Hutter",
  title =        "The Subjective Computable Universe",
  booktitle =    "A Computable Universe: Understanding and Exploring Nature as Computation",
  pages =        "399--416",
  _editor =       "Hector Zenil",
  publisher =    "World Scientific",
  _month =        dec,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ctoe2",
  pdf =          "http://www.hutter1.net/publ/ctoe2.pdf",
  latex =        "http://www.hutter1.net/publ/ctoe2.zip",
  slides =       "http://www.hutter1.net/publ/sctoe.pdf",
  video =        "http://pirsa.org/displayFlash.php?id=18040117",
  http =         "http://www.worldscientific.com/worldscibooks/10.1142/8306",
  project =      "http://www.hutter1.net/official/projects.htm#physics",
  doi =          "10.1142/9789814374309_0021",
  isbn =         "978-9-814-37429-3",
  keywords =     "world models; observer localization; computability;
                  predictive power; Ockham's razor; universal theories;
                  inductive reasoning; simplicity and complexity.",
  abstract =     "Nearly all theories developed for our world are computational.
                  The fundamental theories in physics can be used to emulate on a
                  computer ever more aspects of our universe. This and the
                  ubiquity of computers and virtual realities has increased the
                  acceptance of the computational paradigm. A computable theory
                  of everything seems to have come within reach. Given the
                  historic progression of theories from ego- to geo- to
                  helio-centric models to universe and multiverse theories, the
                  next natural step was to postulate a multiverse composed of all
                  computable universes. Unfortunately, rather than being a theory
                  of everything, the result is more a theory of nothing, which
                  actually plagues all too-large universe models in which
                  observers occupy random or remote locations. The problem can be
                  solved by incorporating the subjective observer process into
                  the theory. While the computational paradigm exposes a
                  fundamental problem of large-universe theories, it also
                  provides its solution.",
  for =          "080401(70%),020103(30%)",
  seo =          "970122(100%)",
}
@InProceedings{Hutter:12watershed,
  author =       "Di Yang and Stephen Gould and Marcus Hutter",
  title =        "A Noise Tolerant Watershed Transformation with Viscous Force for Seeded Image Segmentation",
  booktitle =    "Proc. 11th Asian Conf. on Computer Vision ({ACCV'12})",
  address =      "Daejeon, Korea",
  series =       "LNCS",
  volume =       "7724",
  pages =        "775--789",
  _editor =       "K. M. Lee and Y. Matsushita and J. M. Rehg and Z. Hu",
  publisher =    "Springer",
  _month =        nov,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#watershed",
  pdf =          "http://www.hutter1.net/publ/watershed.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  issn =         "0302-9743",
  isbn =         "978-3-642-37330-5",
  doi =          "10.1007/978-3-642-37331-2_58",
  keywords =     "seeded image segmentation; viscous force; local average path; noise-tolerant.",
  abstract =     "The watershed transform was proposed as a novel method for image
                  segmentation over 30 years ago. Today it is still used as an
                  elementary step in many powerful segmentation procedures. The
                  watershed transform constitutes one of the main concepts of
                  mathematical morphology as an important region-based image
                  segmentation approach. However, the original watershed transform is
                  highly sensitive to noise and is incapable of detecting objects
                  with broken edges. Consequently its adoption in domains where
                  imaging is subject to high noise is limited. By incorporating a
                  high-order energy term into the original watershed transform, we
                  proposed the viscous force watershed transform, which is more
                  immune to noise and able to detect objects with broken edges.",
  support =      "ControlExpert GmbH",
  for =          "080106(100%)",
  seo =          "970108(80%),890205(20%)",
  znote =        "Acceptance rate: 226/869 = 26\%",
}
@InProceedings{Hutter:12pacmdp,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "{PAC} bounds for discounted {MDP}s",
  booktitle =    "Proc. 23rd International Conf. on Algorithmic Learning Theory ({ALT'12})",
  address =      "Lyon, France",
  series =       "LNAI",
  volume =       "7568",
  _editor =       "N.H. Bshouty and G. Stoltz and N. Vayatis and T. Zeugmann",
  publisher =    "Springer",
  pages =        "320--334",
  _month =        oct,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pacmdp",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT12/",
  url =          "http://arxiv.org/abs/1202.3890",
  pdf =          "http://www.hutter1.net/publ/pacmdp.pdf",
  slides =       "http://www.hutter1.net/publ/spacmdp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  doi =          "10.1007/978-3-642-34106-9_26",
  issn =         "0302-9743",
  isbn =         "3-642-34105-2",
  keywords =     "Reinforcement learning; sample-complexity;
                  exploration exploitation; PAC-MDP;
                  Markov decision processes.",
  abstract =     "We study upper and lower bounds on the sample-complexity of
                  learning near-optimal behaviour in finite-state discounted
                  Markov Decision Processes (MDPs). We prove a new bound for a
                  modified version of Upper Confidence Reinforcement Learning
                  (UCRL) with only cubic dependence on the horizon. The bound is
                  unimprovable in all parameters except the size of the
                  state/action space, where it depends linearly on the number of
                  non-zero transition probabilities. The lower bound strengthens
                  previous work by being both more general (it applies to all
                  policies) and tighter. The upper and lower bounds match up to
                  logarithmic factors provided the transition matrix is not too
                  dense.",
  support =      "ARC grant DP0988049",
  for =          "010404(30%),010405(30%),080198(40%).",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 23/47 = 49\%",
}
@InCollection{Hutter:12uaigentle,
  author =       "Marcus Hutter",
  title =        "One Decade of Universal Artificial Intelligence",
  booktitle =    "Theoretical Foundations of Artificial General Intelligence",
  pages =        "67--88",
  _editor =       "Pei Wang and Ben Goertzel",
  publisher =    "Atlantis Press",
  _month =        sep,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uaigentle",
  url =          "http://arxiv.org/abs/1202.6153",
  pdf =          "http://www.hutter1.net/publ/uaigentle.pdf",
  latex =        "http://www.hutter1.net/publ/uaigentle.zip",
  slides =       "http://www.hutter1.net/publ/suaigentle.pdf",
  slides2 =      "http://www.hutter1.net/publ/suai4lay.pdf",
  video =        "http://vimeo.com/7321732",
  video2 =       "http://www.youtube.com/watch?v=I-vx5zbOOXI",
  http =         "http://2012.singularitysummit.com.au/2012/08/universal-artificial-intelligence/",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  interview =    "http://www.youtube.com/watch?v=a2tgUXm_txw",
  doi =          "10.2991/978-94-91216-62-6_5",
  isbn =         "978-94-91216-61-9(print) 978-94-91216-62-6(online)",
  keywords =     "artificial intelligence; reinforcement learning;
                  algorithmic information theory; sequential decision theory;
                  universal induction; rational agents; foundations.",
  abstract =     "The first decade of this century has seen the nascency of the
                  first mathematical theory of general artificial intelligence.
                  This theory of Universal Artificial Intelligence (UAI) has made
                  significant contributions to many theoretical, philosophical,
                  and practical AI questions. In a series of papers culminating
                  in book (Hutter, 2005), an exciting sound and complete
                  mathematical model for a super intelligent agent (AIXI) has
                  been developed and rigorously analyzed. While nowadays most AI
                  researchers avoid discussing intelligence, the award-winning
                  PhD thesis (Legg, 2008) provided the philosophical embedding
                  and investigated the UAI-based universal measure of rational
                  intelligence, which is formal, objective and
                  non-anthropocentric. Recently, effective approximations of AIXI
                  have been derived and experimentally investigated in JAIR paper
                  (Veness et al. 2011). This practical breakthrough has resulted
                  in some impressive applications, finally muting earlier
                  critique that UAI is only a theory. For the first time, without
                  providing any domain knowledge, the same agent is able to
                  self-adapt to a diverse range of interactive environments. For
                  instance, AIXI is able to learn from scratch to play TicTacToe,
                  Pacman, Kuhn Poker, and other games by trial and error, without
                  even providing the rules of the games.
                    These achievements give new hope that the grand goal of
                  Artificial General Intelligence is not elusive.
                    This article provides an informal overview of UAI in context.
                  It attempts to gently introduce a very theoretical, formal, and
                  mathematical subject, and discusses philosophical and technical
                  ingredients, traits of intelligence, some social questions, and
                  the past and future of UAI.",
  support =      "ARC grant DP0988049",
  for =          "080401(20%),080101(30%),080199(30%),220399(20%)",
  seo =          "970108(80%),870122(20%)",
}
@InProceedings{Hutter:12ctmrl,
  author =       "Phuong Nguyen and Peter Sunehag and Marcus Hutter",
  title =        "Context Tree Maximizing Reinforcement Learning",
  booktitle =    "Proc. 26th {AAAI} Conference on Artificial Intelligence ({AAAI'12})",
  volume =       "",
  pages =        "1075--1082",
  _editor =       "Jörg Hoffmann and Bart Selman",
  publisher =    "AAAI Press",
  address =      "Toronto, Canada",
  _month =        jul,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ctmrl",
  http =         "http://www.aaai.org/ocs/index.php/AAAI/AAAI12/paper/view/5079",
  pdf =          "http://www.hutter1.net/publ/ctmrl.pdf",
  latex =        "http://www.hutter1.net/publ/ctmrl.zip",
  poster =       "http://www.hutter1.net/publ/sctmrl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  code =         "http://www.hutter1.net/publ/ctmrlcode.zip",
  isbn =         "978-1-57735-568-7",
  keywords =     "Context Tree Maximization; Markov Decision Process;
                  Feature Reinforcement Learning.",
  abstract =     "Recent developments in reinforcement learning for non-
                  Markovian problems witness a surge in history-based methods,
                  among which we are particularly interested in two frameworks,
                  PhiMDP and MC-AIXI-CTW. PhiMDP attempts to reduce
                  the general RL problem, where the environment's states
                  and dynamics are both unknown, to an MDP, while MCAIXI-
                  CTW incrementally learns a mixture of context trees
                  as its environment model. The main idea of PhiMDP is to connect
                  generic reinforcement learning with classical reinforcement
                  learning. The first implementation of PhiMDP relies on a
                  stochastic search procedure for finding a tree that minimizes a
                  certain cost function. This does not guarantee finding the minimizing
                  tree, or even a good one, given limited search time.
                  As a consequence it appears that the approach has difficulties
                  with large domains. MC-AIXI-CTW is attractive in that it can
                  incrementally and analytically compute the internal model
                  through interactions with the environment. Unfortunately, it
                  is computationally demanding due to requiring heavy planning
                  simulations at every single time step. We devise a novel
                  approach called CTMRL, which analytically and efficiently
                  finds the cost-minimizing tree. Instead of the context-tree
                  weighting method that MC-AIXI-CTW is based on, we use
                  the closely related context-tree maximizing algorithm that selects
                  just one single tree. This approach falls under the PhiMDP
                  framework, which allows the replacement of the costly planning
                  component of MC-AIXI-CTW with simple Q-Learning.
                  Our empirical investigation shows that CTMRL finds policies
                  of quality as good as MC-AIXI-CTW's on six domains
                  including a challenging Pacman domain, but in an order of
                  magnitude less time.",
  support =      "ARC grant DP120100950",
  for =          "080401(20%),010405(20%),080101(60%)",
  seo =          "970108(100%)",
  znote =        "Acceptance rate: 294/1129 = 26\%",
}
@TechReport{Hutter:12ssdc,
  author =       "Joel Veness and Marcus Hutter",
  title =        "Sparse Sequential {D}irichlet Coding",
  institution =  "UoA and ANU",
  number =       "arXiv:1206.3618",
  _month =        jun,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ssdc",
  url =          "http://arxiv.org/abs/1206.3618",
  pdf =          "http://www.hutter1.net/publ/ssdc.pdf",
  latex =        "http://www.hutter1.net/publ/ssdc.tex",
  slides =       "http://www.hutter1.net/publ/ssad.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  keywords =     "Dirichlet prior; KT estimator; sparse coding;
                  small/large alphabet; data compression.",
  abstract =     "This short paper describes a simple coding technique, Sparse
                  Sequential Dirichlet Coding, for multi-alphabet memoryless
                  sources. It is appropriate in situations where only a small,
                  unknown subset of the possible alphabet symbols can be expected
                  to occur in any particular data sequence. We provide a
                  competitive analysis which shows that the performance of Sparse
                  Sequential Dirichlet Coding will be close to that of a
                  Sequential Dirichlet Coder that knows in advance the exact
                  subset of occurring alphabet symbols. Empirically we show that
                  our technique can perform similarly to the more computationally
                  demanding Sequential Sub-Alphabet Estimator, while using less
                  computational resources.",
  for =          "080401(100%)",
  seo =          "970108(80%),890205(20%)",
}
@InProceedings{Hutter:12ctswitch,
  author =       "Joel Veness and Kee Siong Ng and Marcus Hutter and Michael Bowling",
  title =        "Context Tree Switching",
  booktitle =    "Proc. Data Compression Conference ({DCC'12})",
  pages =        "327--336",
  _editor =       "J. A. Storer and M. W. Marcellin",
  publisher =    "IEEE Computer Society",
  address =      "Snowbird, Utah, USA",
  _month =        apr,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ctswitch",
  url =          "http://arxiv.org/abs/1111.3182",
  pdf =          "http://www.hutter1.net/publ/ctswitch.pdf",
  latex =        "http://www.hutter1.net/publ/ctswitch.tex",
  slides =       "http://www.hutter1.net/publ/sctswitch.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  code =         "http://jveness.info/software/cts-v1.zip",
  doi =          "10.1109/DCC.2012.39",
  issn =         "1068-0314",
  isbn =         "978-1-4673-0715-4",
  keywords =     "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
  abstract =     "This paper describes the Context Tree Switching technique, a
                  modification of Context Tree Weighting for the prediction of
                  binary, stationary, n-Markov sources. By modifying Context
                  Tree Weighting's recursive weighting scheme, it is possible to
                  mix over a strictly larger class of models without increasing
                  the asymptotic time or space complexity of the original
                  algorithm. We prove that this generalization preserves the
                  desirable theoretical properties of Context Tree Weighting on
                  stationary n-Markov sources, and show empirically that this new
                  technique leads to consistent improvements over Context Tree
                  Weighting as measured on the Calgary Corpus.",
  support =      "ARC grant DP0988049",
  for =          "080401(100%)",
  seo =          "970108(80%),890205(20%)",
}
@InProceedings{Hutter:12adapctw,
  author =       "Alexander O'Neill and Marcus Hutter and Wen Shao and Peter Sunehag",
  title =        "Adaptive Context Tree Weighting",
  booktitle =    "Proc. Data Compression Conference ({DCC'12})",
  pages =        "317--326",
  _editor =       "J. A. Storer and M. W. Marcellin",
  publisher =    "IEEE Computer Society",
  address =      "Snowbird, Utah, USA",
  _month =        apr,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#adapctw",
  url =          "http://arxiv.org/abs/1201.2056",
  pdf =          "http://www.hutter1.net/publ/adapctw.pdf",
  latex =        "http://www.hutter1.net/publ/adapctw.tex",
  slides =       "http://www.hutter1.net/publ/sadapctw.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#compress",
  code =         "http://www.hutter1.net/publ/actwcode.zip",
  doi =          "10.1109/DCC.2012.38",
  issn =         "1068-0314",
  isbn =         "978-0-7695-4656-8",
  keywords =     "adaptive data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
  abstract =     "We describe an adaptive context tree weighting (ACTW)
                  algorithm, as an extension to the standard context tree
                  weighting (CTW) algorithm. Unlike the standard CTW algorithm,
                  which weights all observations equally regardless of the depth,
                  ACTW gives increasing weight to more recent observations,
                  aiming to improve performance in cases where the input sequence
                  is from a non-stationary distribution. Data compression results
                  show ACTW variants improving over CTW on merged files from
                  standard compression benchmark tests while never being
                  significantly worse on any individual file.",
  support =      "ARC grant DP0988049",
  for =          "080401(100%)",
  seo =          "970108(80%),890205(20%)",
}
@Article{Hutter:12singularity,
  author =       "Marcus Hutter",
  title =        "Can Intelligence Explode?",
  journal =      "Journal of Consciousness Studies",
  volume =       "19",
  number =       "1-2",
  pages =        "143--166",
  publisher =    "Imprint Academic",
  _month =        feb,
  year =         "2012",
  bibtex =       "http://www.hutter1.net/official/bib.htm#singularity",
  http =         "http://www.ingentaconnect.com/content/imp/jcs/2012/00000019/F0020001/art00010",
  url =          "http://arxiv.org/abs/1202.6177",
  pdf =          "http://www.hutter1.net/publ/singularity.pdf",
  latex =        "http://www.hutter1.net/publ/singularity.tex",
  slidespdf =    "http://www.hutter1.net/publ/ssingularity.pdf",
  slidesppt =    "http://www.hutter1.net/publ/ssingularity.pptx",
  slidesaudio =  "http://www.hutter1.net/publ/sasingularity.pptx",
  slidesvideo=   "http://www.hutter1.net/publ/svsingularity.avi",
  livevideo =    "http://www.youtube.com/watch?v=focaMjQbmkI",
  http =         "http://2012.singularitysummit.com.au/2012/08/can-intelligence-explode/",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  interview =    "http://www.youtube.com/watch?v=omG990F_ETY",
  issn =         "1355-8250",
  keywords =     "singularity; acceleration; intelligence; evolution;
                  rationality; goal; life; value; virtual; computation; AIXI.",
  abstract =     "The technological singularity refers to a hypothetical scenario
                  in which technological advances virtually explode. The most
                  popular scenario is the creation of super-intelligent
                  algorithms that recursively create ever higher intelligences.
                  It took many decades for these ideas to spread from science
                  fiction to popular science magazines and finally to attract the
                  attention of serious philosophers. David Chalmers' (JCS 2010)
                  article is the first comprehensive philosophical analysis of
                  the singularity in a respected philosophy journal. The
                  motivation of my article is to augment Chalmers' and to discuss
                  some issues not addressed by him, in particular what it could
                  mean for intelligence to explode. In this course, I will (have
                  to) provide a more careful treatment of what intelligence
                  actually is, separate speed from intelligence explosion,
                  compare what super-intelligent participants and classical human
                  observers might experience and do, discuss immediate
                  implications for the diversity and value of life, consider
                  possible bounds on intelligence, and contemplate intelligences
                  right at the singularity.",
  for =          "080101(40%),140104(10%),220312(50%)",
  seo =          "970122(100%)",
}

%-------------Publications-of-Marcus-Hutter-2011--------------%

@InProceedings{Hutter:11segm3d,
  author =       "Srimal Jayawardena and Di Yang and Marcus Hutter",
  title =        "3D Model Assisted Image Segmentation",
  booktitle =    "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})",
  pages =        "51--58",
  _editor =       "Andrew Bradley and Paul Jackway",
  publisher =    "IEEE Xplore",
  address =      "Noosa, Australia",
  _month =        dec,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#segm3d",
  url =          "http://arxiv.org/abs/1202.1943",
  pdf =          "http://www.hutter1.net/publ/segm3d.pdf",
  latex =        "http://www.hutter1.net/publ/segm3d.zip",
  slides =       "http://www.hutter1.net/publ/ssegm3d.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  doi =          "10.1109/DICTA.2011.17",
  isbn =         "978-1-4577-2006-2 or 978-0-7695-4588-2",
  keywords =     "Image segmentation; 3D-2D Registration; Full 3D Pose; Contour Detection; Fully Automatic.",
  abstract =     "The problem of segmenting a given image into coherent regions
                  is important in Computer Vision and many industrial
                  applications require segmenting a known object into its
                  components. Examples include identifying individual parts of a
                  component for process control work in a manufacturing plant and
                  identifying parts of a car from a photo for automatic damage
                  detection. Unfortunately most of an object's parts of interest
                  in such applications share the same pixel characteristics,
                  having similar colour and texture. This makes segmenting the
                  object into its components a non-trivial task for conventional
                  image segmentation algorithms. In this paper, we propose a
                  ``Model Assisted Segmentation'' method to tackle this problem. A
                  3D model of the object is registered over the given image by
                  optimising a novel gradient based loss function. This
                  registration obtains the full 3D pose from an image of the
                  object. The image can have an arbitrary view of the object and
                  is not limited to a particular set of views. The segmentation
                  is subsequently performed using a level-set based method, using
                  the projected contours of the registered 3D model as
                  initialisation curves. The method is fully automatic and
                  requires no user interaction. Also, the system does not require
                  any prior training. We present our results on photographs of a
                  real car.",
  support =      "ControlExpert GmbH",
  for =          "080104(50%),080106(50%)",
  znote =        "Acceptance rate: 42/200 = 21\% (oral)."
}
@InProceedings{Hutter:11losspose,
  author =       "Srimal Jayawardena and Marcus Hutter and Nathan Brewer",
  title =        "A Novel Illumination-Invariant Loss for Monocular 3D Pose Estimation",
  booktitle =    "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})",
  pages =        "37--44",
  _editor =       "Andrew Bradley and Paul Jackway",
  publisher =    "IEEE Xplore",
  address =      "Noosa, Australia",
  _month =        dec,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#losspose",
  url =          "http://arxiv.org/abs/1011.1035",
  pdf =          "http://www.hutter1.net/publ/losspose.pdf",
  latex =        "http://www.hutter1.net/publ/losspose.zip",
  slides =       "http://www.hutter1.net/publ/slosspose.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  doi =          "10.1109/DICTA.2011.15",
  isbn =         "978-1-4577-2006-2 or 978-0-7695-4588-2",
  keywords =     "illumination-invariant loss; 2D-3D pose estimation;
                  pixel-based; featureless; optimisation; 3D model, monocular.",
  abstract =     "The problem of identifying the 3D pose of a known object from a
                  given 2D image has important applications in Computer Vision.
                  Our proposed method of registering a 3D model of a known object
                  on a given 2D photo of the object has numerous advantages over
                  existing methods. It does not require prior training, knowledge
                  of the camera parameters, explicit point correspondences or
                  matching features between the image and model. Unlike
                  techniques that estimate a partial 3D pose (as in an overhead
                  view of traffic or machine parts on a conveyor belt), our
                  method estimates the complete 3D pose of the object. It works
                  on a single static image from a given view under varying and
                  unknown lighting conditions. For this purpose we derive a novel
                  illumination-invariant distance measure between the 2D photo
                  and projected 3D model, which is then minimised to find the
                  best pose parameters. Results for vehicle pose detection in
                  real photographs are presented.",
  support =      "ControlExpert GmbH",
  for =          "080104(50%),080106(50%)",
  znote =        "Acceptance rate: 42/200 = 21\% (oral)."
}
@InProceedings{Hutter:11uivnfl,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "No Free Lunch versus {O}ccam's Razor in Supervised Learning",
  booktitle =    "Proc. Solomonoff 85th Memorial Conference",
  address =      "Melbourne, Australia",
  series =       "LNAI",
  volume =       "7070",
  pages =        "223--235",
  _editor =       "David Dowe",
  publisher =    "Springer",
  _month =        nov,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uivnfl",
  url =          "http://arxiv.org/abs/1111.3846",
  pdf =          "http://www.hutter1.net/publ/uivnfl.pdf",
  latex =        "http://www.hutter1.net/publ/uivnfl.zip",
  slides =       "http://www.hutter1.net/publ/suivnfl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/978-3-642-44958-1_17",
  issn =         "0302-9743",
  isbn =         "978-3-642-44957-4",
  keywords =     "Supervised Learning; Kolmogorov complexity; Occam's Razor; No Free Lunch.",
  abstract =     "The No Free Lunch theorems are often used to argue that domain
                  specific knowledge is required to design successful
                  algorithms. We use algorithmic information theory to argue the
                  case for a universal bias allowing an algorithm to succeed in
                  all interesting problem domains. Additionally, we give a new
                  algorithm for off-line classification, inspired by Solomonoff
                  induction, with good performance on all structured problems
                  under reasonable assumptions. This includes a proof of the
                  efficacy of the well-known heuristic of randomly selecting
                  training data in the hope of reducing misclassification rates.",
  support =      "ARC grant DP0988049",
  for =          "010404(20%),010405(20%),080198(60%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:11aixiaxiom2,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Principles of {S}olomonoff Induction and {AIXI}",
  booktitle =    "Proc. Solomonoff 85th Memorial Conference",
  address =      "Melbourne, Australia",
  series =       "LNAI",
  volume =       "7070",
  pages =        "386--398",
  _editor =       "David Dowe",
  publisher =    "Springer",
  _month =        nov,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiaxiom2",
  url =          "http://arxiv.org/abs/1111.6117",
  pdf =          "http://www.hutter1.net/publ/aixiaxiom2.pdf",
  latex =        "http://www.hutter1.net/publ/aixiaxiom2.tex",
  slides =       "http://www.hutter1.net/publ/saixiaxiom2.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-44958-1_30",
  issn =         "0302-9743",
  isbn =         "978-3-642-44957-4",
  keywords =     "computability; representation; rationality; Solomonoff induction.",
  abstract =     "We identify principles underlying Solomonoff Induction. Key
                  concepts are rationality, computability, indifference and time
                  consistency. Furthermore, we discuss extensions to the full AI
                  case to derive AIXI.",
  support =      "ARC grant DP0988049",
  for =          "080401(20%),010404(30%),080101(50%)",
  seo =          "970108(80%),970122(20%)",
}
@InProceedings{Hutter:11unipreq,
  author =       "Ian Wood and Peter Sunehag and Marcus Hutter",
  title =        "({N}on-)Equivalence of Universal Priors",
  booktitle =    "Proc. Solomonoff 85th Memorial Conference",
  address =      "Melbourne, Australia",
  series =       "LNAI",
  volume =       "7070",
  pages =        "417--425",
  _editor =       "David Dowe",
  publisher =    "Springer",
  _month =        nov,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#unipreq",
  url =          "http://arxiv.org/abs/1111.3854",
  pdf =          "http://www.hutter1.net/publ/unipreq.pdf",
  latex =        "http://www.hutter1.net/publ/unipreq.tex",
  slides =       "http://www.hutter1.net/publ/sunipreq.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/978-3-642-44958-1_33",
  issn =         "0302-9743",
  isbn =         "978-3-642-44957-4",
  keywords =     "algorithmic information theory; universal induction; universal prior.",
  abstract =     "Ray Solomonoff invented the notion of universal induction
                  featuring an aptly termed ``universal'' prior probability
                  function over all possible computable environments. The
                  essential property of this prior was its ability to dominate
                  all other such priors. Later, Levin introduced another
                  construction --- a mixture of all possible priors or `universal
                  mixture'. These priors are well known to be equivalent up to
                  multiplicative constants. Here, we seek to clarify further the
                  relationships between these three characterisations of a
                  universal prior (Solomonoff's, universal mixtures, and
                  universally dominant priors). We see that the the constructions
                  of Solomonoff and Levin define an identical class of priors,
                  while the class of universally dominant priors is strictly
                  larger. We provide some characterisation of the discrepancy.",
  support =      "ARC grant DP0988049",
  for =          "010404(40%),010405(30%),080401(30%)",
  seo =          "970108(100%)",
}
@InProceedings{Hutter:11asyoptag,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "Asymptotically Optimal Agents",
  booktitle =    "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
  address =      "Espoo, Finland",
  series =       "LNAI",
  volume =       "6925",
  _editor =       "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
  publisher =    "Springer",
  pages =        "368--382",
  _month =        oct,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#asyoptag",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
  url =          "http://arxiv.org/abs/1107.5537",
  pdf =          "http://www.hutter1.net/publ/asyoptag.pdf",
  latex =        "http://www.hutter1.net/publ/asyoptag.tex",
  slides =       "http://www.hutter1.net/publ/sasyoptag.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-24412-4_29",
  issn =         "0302-9743",
  isbn =         "3-642-24411-4",
  keywords =     "Rational agents; sequential decision theory;
                  artificial general intelligence; reinforcement learning;
                  asymptotic optimality; general discounting.",
  abstract =     "Artificial general intelligence aims to create agents capable
                  of learning to solve arbitrary interesting problems. We define
                  two versions of asymptotic optimality and prove that no agent
                  can satisfy the strong version while in some cases, depending
                  on discounting, there does exist a non-computable weak
                  asymptotically optimal agent.",
  support =      "ARC grant DP0988049",
  for =          "080101(100%)",
  znote =        "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11evenbits,
  author =       "Tor Lattimore and Marcus Hutter and Vaibhav Gavane",
  title =        "Universal Prediction of Selected Bits",
  booktitle =    "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
  address =      "Espoo, Finland",
  series =       "LNAI",
  volume =       "6925",
  _editor =       "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
  publisher =    "Springer",
  pages =        "262--276",
  _month =        oct,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#evenbits",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
  url =          "http://arxiv.org/abs/1107.5531",
  pdf =          "http://www.hutter1.net/publ/evenbits.pdf",
  latex =        "http://www.hutter1.net/publ/evenbits.tex",
  slides =       "http://www.hutter1.net/publ/sevenbits.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/978-3-642-24412-4_22",
  issn =         "0302-9743",
  isbn =         "3-642-24411-4",
  keywords =     "Sequence prediction; Solomonoff induction;
                  online classification; discriminative learning;
                  algorithmic information theory.",
  abstract =     "Many learning tasks can be viewed as sequence prediction
                  problems. For example, online classification can be converted
                  to sequence prediction with the sequence being pairs of
                  input/target data and where the goal is to correctly predict
                  the target data given input data and previous input/target
                  pairs. Solomonoff induction is known to solve the general
                  sequence prediction problem, but only if the entire sequence is
                  sampled from a computable distribution. In the case of
                  classification and discriminative learning though, only the
                  targets need be structured (given the inputs). We show that the
                  normalised version of Solomonoff induction can still be used in
                  this case, and more generally that it can detect any recursive
                  sub-pattern (regularity) within an otherwise completely
                  unstructured sequence. It is also shown that the unnormalised
                  version can fail to predict very simple recursive sub-patterns.",
  support =      "ARC grant DP0988049",
  for =          "080401(40%),010404(30%),010405(30%)",
  znote =        "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11tcdisc,
  author =       "Tor Lattimore and Marcus Hutter",
  title =        "Time Consistent Discounting",
  booktitle =    "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
  address =      "Espoo, Finland",
  series =       "LNAI",
  volume =       "6925",
  _editor =       "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
  publisher =    "Springer",
  pages =        "383--397",
  _month =        oct,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#tcdisc",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
  url =          "http://arxiv.org/abs/1107.5528",
  pdf =          "http://www.hutter1.net/publ/tcdisc.pdf",
  latex =        "http://www.hutter1.net/publ/tcdisc.tex",
  slides =       "http://www.hutter1.net/publ/stcdisc.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  doi =          "10.1007/978-3-642-24412-4_30",
  issn =         "0302-9743",
  isbn =         "3-642-24411-4",
  keywords =     "Rational agents; sequential decision theory;
                  general discounting; time-consistency; game theory.",
  abstract =     "A possibly immortal agent tries to maximise its summed
                  discounted rewards over time, where discounting is used to
                  avoid infinite utilities and encourage the agent to value
                  current rewards more than future ones. Some commonly used
                  discount functions lead to time-inconsistent behavior where the
                  agent changes its plan over time. These inconsistencies can
                  lead to very poor behavior. We generalise the usual discounted
                  utility model to one where the discount function changes with
                  the age of the agent. We then give a simple characterisation of
                  time-(in)consistent discount functions and show the existence
                  of a rational policy for an agent that knows its discount
                  function is time-inconsistent.",
  for =          "010405(20%),080101(40%),140104(20%),170202(20%)",
  seo =          "970108(40%),970114(30%),970117(30%)",
  znote =        "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11aixiaxiom,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Axioms for Rational Reinforcement Learning",
  booktitle =    "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
  address =      "Espoo, Finland",
  series =       "LNAI",
  volume =       "6925",
  _editor =       "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
  publisher =    "Springer",
  pages =        "338--352",
  _month =        oct,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiaxiom",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
  url =          "http://arxiv.org/abs/1107.5520",
  pdf =          "http://www.hutter1.net/publ/aixiaxiom.pdf",
  latex =        "http://www.hutter1.net/publ/aixiaxiom.tex",
  slides =       "http://www.hutter1.net/publ/saixiaxiom.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1007/978-3-642-24412-4_27",
  issn =         "0302-9743",
  isbn =         "3-642-24411-4",
  keywords =     "Rationality; Probability; Utility; Banach Space; Linear Functional.",
  abstract =     "We provide a formal, simple and intuitive theory of rational
                  decision making including sequential decisions that affect the
                  environment. The theory has a geometric flavor, which makes the
                  arguments easy to visualize and understand. Our theory is for
                  complete decision makers, which means that they have a complete set
                  of preferences. Our main result shows that a complete rational
                  decision maker implicitly has a probabilistic model of the
                  environment. We have a countable version of this result that brings
                  light on the issue of countable vs finite additivity by showing how
                  it depends on the geometry of the space which we have preferences
                  over. This is achieved through fruitfully connecting rationality
                  with the Hahn-Banach Theorem. The theory presented here can be
                  viewed as a  formalization and extension of the betting odds
                  approach to probability of Ramsey (1931) and De Finetti (1937).",
  support =      "ARC grant DP0988049",
  for =          "080401(20%),010404(30%),080101(50%)",
  znote =        "Acceptance rate: 28/61 = 46\%",
}
@Proceedings{Hutter:11ewrlproc,
  editor =       "Scott Sanner and Marcus Hutter",
  title =        "European Workshop on Reinforcement Learning",
  subtitle =     "9th European Workshop ({EWRL'11})",
  publisher =    "Springer",
  address =      "Athens, Greece",
  series =       "LNAI",
  volume =       "7188",
  _month =        sep,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ewrlproc11",
  http =         "http://www.springer.com/computer/ai/book/978-3-642-29945-2",
  pdf =          "http://www.hutter1.net/publ/ewrlproc11.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "0302-9743",
  isbn =         "978-3-642-29945-2",
  doi =          "10.1007/978-3-642-29946-9",
  keywords =     "artificial intelligence; machine learning;
                  reinforcement learning; Markov decision process;
                  function approximation; action; reward; observation; policy; agent.",
  abstract =     "This book constitutes revised and selected papers of the 9th
                  European Workshop on Reinforcement Learning, EWRL 2011, which
                  took place in Athens, Greece in September 2011. The papers
                  presented were carefully reviewed and selected from 40
                  submissions. The papers are organized in topical sections
                  online reinforcement learning, learning and exploring MDPs,
                  function approximation methods for reinforcement learning,
                  macro-actions in reinforcement learning, policy search and
                  bounds, multi-task and transfer reinforcement learning,
                  multi-agent reinforcement learning, apprenticeship and inverse
                  reinforcement learning and real-world reinforcement learning.",
  for =          "080101(50%),080198(50%)",
}
@InProceedings{Hutter:11frlexp,
  author =       "Phuong Nguyen and Peter Sunehag and Marcus Hutter",
  title =        "Feature Reinforcement Learning in Practice",
  booktitle =    "Proc. 9th European Workshop on Reinforcement Learning ({EWRL-9})",
  series =       "LNAI",
  volume =       "7188",
  pages =        "66--77",
  publisher =    "Springer",
  _month =        sep,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#frlexp",
  url =          "http://arxiv.org/abs/1108.3614",
  pdf =          "http://www.hutter1.net/publ/frlexp.pdf",
  slides =       "http://www.hutter1.net/publ/sfrlexp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "0302-9743",
  isbn =         "978-3-642-29945-2",
  doi =          "10.1007/978-3-642-29946-9_10",
  keywords =     "Reinforcement learning; context Markov trees; Markov decision process;
                  partial observability; information \& complexity;
                  Monte Carlo search; Kuhn poker; rational agents.",
  abstract =     "Following a recent surge in using history-based methods for
                  resolving perceptual aliasing in reinforcement learning, we
                  introduce an algorithm based on the feature reinforcement
                  learning framework called $\Phi$MDP \cite{MH09c}. To create a
                  practical algorithm we devise a stochastic search procedure for
                  a class of context trees based on parallel tempering and a
                  specialized proposal distribution. We provide the first
                  empirical evaluation for $\Phi$MDP. Our proposed algorithm
                  achieves superior performance to the classical U-tree algorithm
                  \cite{AKM96} and the recent active-LZ algorithm \cite{Far10},
                  and is competitive with MC-AIXI-CTW \cite{VNHUS11} that
                  maintains a bayesian mixture over all context trees up to a
                  chosen depth. We are encouraged by our ability to compete with
                  this sophisticated method using an algorithm that simply picks
                  one single model, and uses Q-learning on the corresponding MDP.
                  Our $\Phi$MDP algorithm is much simpler, yet consumes less time
                  and memory. These results show promise for our future work on
                  attacking more complex and larger problems.",
  support =      "ARC grant DP0988049",
  for =          "080401(30%),010405(20%),080101(50%)",
}
@Article{Hutter:11uiphil,
  author =       "Samuel Rathmanner and Marcus Hutter",
  title =        "A Philosophical Treatise of Universal Induction",
  journal =      "Entropy",
  volume =       "13",
  number =       "6",
  pages =        "1076--1136",
  _month =        jun,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uiphil",
  url =          "http://arxiv.org/abs/1105.5721",
  pdf =          "http://www.hutter1.net/publ/uiphil.pdf",
  latex =        "http://www.hutter1.net/publ/uiphil.zip",
  slides =       "http://www.hutter1.net/publ/suiphil.pdf",
  video1 =       "http://www.youtube.com/watch?v=gb4oXRsw3yA",
  video2 =       "http://www.youtube.com/watch?v=Q_cHUpwpdFo",
  video3 =       "https://www.youtube.com/watch?v=bn060on1hKs",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.3390/e13061076",
  issn =         "1099-4300",
  keywords =     "sequence prediction; inductive inference; Bayes rule;
                  Solomonoff prior; Kolmogorov complexity; Occam's razor;
                  philosophical issues; confirmation theory; Black raven paradox.",
  abstract =     "Understanding inductive reasoning is a problem that
                  has engaged mankind for thousands of years. This problem is
                  relevant to a wide range of fields and is integral to the
                  philosophy of science. It has been tackled by many great minds
                  ranging from philosophers to scientists to mathematicians, and
                  more recently computer scientists. In this article we argue the
                  case for Solomonoff Induction, a formal inductive framework
                  which combines algorithmic information theory with the Bayesian
                  framework. Although it achieves excellent theoretical results
                  and is based on solid philosophical foundations, the requisite
                  technical knowledge necessary for understanding this framework
                  has caused it to remain largely unknown and unappreciated in
                  the wider scientific community. The main contribution of this
                  article is to convey Solomonoff induction and its related
                  concepts in a generally accessible form with the aim of
                  bridging this current technical gap. In the process we examine
                  the major historical contributions that have led to the
                  formulation of Solomonoff Induction as well as criticisms of
                  Solomonoff and induction in general. In particular we examine
                  how Solomonoff induction addresses many issues that have
                  plagued other inductive systems, such as the black ravens
                  paradox and the confirmation problem, and compare this approach
                  with other recent approaches.",
  for =          "080401(30%),010404(30%),170203(10%),220304(30%)",
  znote =        "Special Issue on Kolmogorov Complexity edited by Paul Vitanyi",
}
@InCollection{Hutter:11randai,
  author =       "Marcus Hutter",
  title =        "Algorithmic Randomness as Foundation of Inductive Reasoning and Artificial Intelligence",
  booktitle =    "Randomness through Computation",
  subtitle =     "Some Answers, More Questions",
  chapter =      "12",
  _editor =       "H. Zenil",
  publisher =    "World Scientific",
  pages =        "159--169",
  _month =        feb,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#randai",
  url =          "http://arxiv.org/abs/1102.2468",
  pdf =          "http://www.hutter1.net/publ/randai.pdf",
  latex =        "http://www.hutter1.net/publ/randai.tex",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  isbn =         "981-4327-74-3",
  keywords =     "algorithmic information theory; individual randomness;
                  Ockham's razor; inductive reasoning; artificial intelligence.",
  abstract =     "This article is a brief personal account of the past, present,
                  and future of algorithmic randomness, emphasizing its role in
                  inductive inference and artificial intelligence. It is written
                  for a general audience interested in science and philosophy.
                  Intuitively, randomness is a lack of order or predictability.
                  If randomness is the opposite of determinism, then algorithmic
                  randomness is the opposite of computability. Besides many other
                  things, these concepts have been used to quantify Ockham's
                  razor, solve the induction problem, and define intelligence.",
  for =          "080401(40%),010405(20%),080101(10%),080199(30%)",
}
@InCollection{Hutter:11unilearn,
  author =       "Marcus Hutter",
  title =        "Universal Learning Theory",
  booktitle =    "Encyclopedia of Machine Learning",
  pages =        "1001--1008",
  editor =       "C. Sammut and G. Webb",
  publisher =    "Springer",
  _month =        feb,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#unilearn",
  url =          "http://arxiv.org/abs/1102.2467",
  pdf =          "http://www.hutter1.net/publ/unilearn.pdf",
  latex =        "http://www.hutter1.net/publ/unilearn.tex",
  slides =       "http://www.hutter1.net/ai/susp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/978-0-387-30164-8",
  isbn =         "978-0-387-30768-8",
  keywords =     "Algorithmic probability; Ray Solomonoff; induction;
                  prediction; decision; action; Turing machine;
                  Kolmogorov complexity; universal prior; Bayes' rule.",
  abstract =     "This encyclopedic article gives a mini-introduction into the
                  theory of universal learning, founded by Ray Solomonoff in the
                  1960s and significantly developed and extended in the last
                  decade. It explains the spirit of universal learning, but
                  necessarily glosses over technical subtleties.",
  support =      "ARC grant DP0988049",
  for =          "080401(30%),010405(30%),080198(40%)",
}
@Article{Hutter:11aixictwx,
  author =       "Joel Veness and Kee Siong Ng and Marcus Hutter and William Uther and David Silver",
  title =        "A {M}onte-{C}arlo {AIXI} Approximation",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "40",
  pages =        "95--142",
  _publisher =    "AAAI Press",
  _month =        jan,
  year =         "2011",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixictwx",
  url =          "http://arxiv.org/abs/0909.0801",
  pdf =          "http://www.hutter1.net/publ/aixictwx.pdf",
  latex =        "http://www.hutter1.net/publ/aixictwx.zip",
  slides =       "http://www.hutter1.net/publ/saixictwx.pdf",
  award =        "http://www.jair.org/bestpaper.html",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  code =         "http://www.hutter1.net/publ/aixictwxcode.zip",
  doi =          "10.1613/jair.3125",
  issn =         "1076-9757",
  keywords =     "Reinforcement Learning (RL);
                  Context Tree Weighting (CTW);
                  Monte Carlo Tree Search (MCTS);
                  Upper Confidence bounds applied to Trees (UCT);
                  Partially Observable Markov Decision Process (POMDP);
                  Prediction Suffix Trees (PST).",
  abstract =     "This paper introduces a principled approach for the design of a
                  scalable general reinforcement learning agent. Our approach is
                  based on a direct approximation of AIXI, a Bayesian optimality
                  notion for general reinforcement learning agents. Previously,
                  it has been unclear whether the theory of AIXI could motivate
                  the design of practical algorithms. We answer this hitherto
                  open question in the affirmative, by providing the first
                  computationally feasible approximation to the AIXI agent. To
                  develop our approximation, we introduce a new Monte-Carlo Tree
                  Search algorithm along with an agent-specific extension to the
                  Context Tree Weighting algorithm. Empirically, we present a set
                  of encouraging results on a variety of stochastic and partially
                  observable domains. We conclude by proposing a number of
                  directions for future research.",
  support =      "ARC grant DP0988049",
  for =          "080401(20%),010404(20%),080101(60%)",
  note =         "Honorable Mention for the 2014 IJCAI-JAIR Best Paper Prize.",
}

%-------------Publications-of-Marcus-Hutter-2010--------------%

@InProceedings{Hutter:10ctoe,
  author =       "Marcus Hutter",
  title =        "Observer Localization in Multiverse Theories",
  booktitle =    "Proceedings of the Conference in Honour of Murray Gell-Mann's 80th Birthday",
  subtitle =     "Quantum Mechanics, Elementary Particles, Quantum Cosmology and Complexity",
  pages =        "638--645",
  _editor =      "H. Fritzsch and K. K. Phua",
  publisher =    "World Scientific",
  _month =        nov,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ctoe",
  pdf =          "http://www.hutter1.net/publ/ctoe.pdf",
  latex =        "http://www.hutter1.net/publ/ctoe.tex",
  slides =       "http://www.hutter1.net/publ/sctoe.pdf",
  video =        "http://pirsa.org/displayFlash.php?id=18040117",
  project =      "http://www.hutter1.net/official/projects.htm#physics",
  doi =          "10.1142/9789814335614_0069",
  isbn =         "9814335606",
  keywords =     "world models; observer localization; predictive power;
                  Ockham's razor; universal theories; computability.",
  abstract =     "The progression of theories suggested for our world, from ego- to
                  geo- to helio-centric models to universe and multiverse theories and
                  beyond, shows one tendency: The size of the described worlds
                  increases, with humans being expelled from their center to ever more
                  remote and random locations. If pushed too far, a potential theory
                  of everything (TOE) is actually more a theories of nothing (TON).
                  Indeed such theories have already been developed. I show that
                  including observer localization into such theories is necessary and
                  sufficient to avoid this problem. I develop a quantitative recipe to
                  identify TOEs and distinguish them from TONs and theories
                  in-between. This precisely shows what the problem is with some
                  recently suggested universal TOEs.",
  for =          "080401(70%),020103(30%)",
}
@TechReport{Hutter:10lorpc,
  author =       "Minh-Ngoc Tran and Marcus Hutter",
  title =        "Model Selection by Loss Rank for Classification and Unsupervised Learning",
  institution =  "NUS and ANU",
  address =      "Singapore and Australia",
  number =       "arXiv:1011.1379",
  pages =        "1--20",
  _month =        nov,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lorpc",
  url =          "http://arxiv.org/abs/1011.1379",
  pdf =          "http://www.hutter1.net/ai/lorpc.pdf",
  latex =        "http://www.hutter1.net/ai/lorpc.zip",
  slides =       "http://www.hutter1.net/ai/slorp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  keywords =     "Classification; graphical models; loss rank principle; model selection.",
  abstract =     "Hutter (2007) recently introduced the loss rank principle
                  (LoRP) as a generalpurpose principle for model selection. The
                  LoRP enjoys many attractive properties and deserves further
                  investigations. The LoRP has been well-studied for regression
                  framework in Hutter and Tran (2010). In this paper, we study
                  the LoRP for classification framework, and develop it further
                  for model selection problems in unsupervised learning where the
                  main interest is to describe the associations between input
                  measurements, like cluster analysis or graphical modelling.
                  Theoretical properties and simulation studies are presented.",
  for =          "080401(20%),010405(50%),080198(30%)",
  seo =          "970101(70%),970108(30%)",
}
@Proceedings{Hutter:10altproc,
  editor =       "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
  title =        "Algorithmic Learning Theory",
  subtitle =     "21st International Conference ({ALT'10})",
  publisher =    "Springer",
  address =      "Canberra, Australia",
  series =       "LNAI",
  volume =       "6331",
  _month =        oct,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#altproc10",
  http =         "http://www.springer.com/computer/ai/book/978-3-642-16107-0",
  pdf =          "http://www.hutter1.net/publ/altproc10.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#other",
  issn =         "0302-9743",
  isbn =         "978-3-642-16107-0",
  doi =          "10.1007/978-3-642-16108-7",
  keywords =     "statistical learning, grammatical inference, graph learning,
                  PAC learning, query learning, algorithmic teaching, online learning,
                  inductive inference, reinforcement learning, Kernel methods",
  abstract =     "The LNAI series reports state-of-the-art results in artificial
                  intelligence research, development, and education. This volume (LNAI
                  6331) contains research papers presented at the 21st International
                  Conference on Algorithmic Learning Theory (ALT 2007), which was held
                  in Canberra (Australia) during October 6-8, 2010. The main objective
                  of the conference was to provide an interdisciplinary forum for
                  high-quality talks with a strong theoretical background and
                  scientific interchange in areas such as statistical learning,
                  grammatical inference, graph learning, PAC learning, query learning,
                  algorithmic teaching, online learning, inductive inference,
                  reinforcement learning, Kernel methods. The conference was
                  co-located with the 13th International Conference on Discovery
                  Science (DS 2010). The volume includes 26 technical contributions
                  that were selected from 44 submissions, and five invited talks
                  presented to the audience of ALT and DS. Longer versions of the DS
                  invited papers are available in the proceedings of DS 2010.",
  for =          "080401(20%),010405(20%),080199(60%)",
  znote =        "Acceptance rate: 26/44 = 59\%",
}
@InProceedings{Hutter:10altintro,
  author =       "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
  title =        "Algorithmic Learning Theory 2010: Editors' Introduction",
  booktitle =    "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})",
  address =      "Canberra, Australia",
  series =       "LNAI",
  volume =       "6331",
  publisher =    "Springer",
  pages =        "1--10",
  _month =        oct,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#altintro10",
  pdf =          "http://www.hutter1.net/publ/altintro10.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#other",
  issn =         "0302-9743",
  isbn =         "978-3-642-16107-0",
  doi =          "10.1007/978-3-642-16108-7_1",
  keywords =     "algorithmic learning theory, query models, online
                  learning, inductive inference, boosting, kernel methods, complexity
                  and learning, reinforcement learning, unsupervised learning,
                  grammatical inference, algorithmic forecasting.",
  abstract =     "Learning theory is an active research area that incorporates ideas,
                  problems, and techniques from a wide range of disciplines including
                  statistics, artificial intelligence, information theory, pattern
                  recognition, and theoretical computer science. The research reported
                  at the 21st International Conference on Algorithmic Learning Theory
                  (ALT 2010) ranges over areas such as query models, online learning,
                  inductive inference, boosting, kernel methods, complexity and
                  learning, reinforcement learning, unsupervised learning, grammatical
                  inference, and algorithmic forecasting. In this introduction we give
                  an overview of the five invited talks and the regular contributions
                  of ALT 2010.",
  for =          "080401(20%),010405(20%),080199(60%)",
}
@InProceedings{Hutter:10phimp,
  author =       "Peter Sunehag and Marcus Hutter",
  title =        "Consistency of Feature {M}arkov Processes",
  booktitle =    "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})",
  address =      "Canberra, Australia",
  series =       "LNAI",
  volume =       "6331",
  publisher =    "Springer",
  pages =        "360--374",
  _month =        oct,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#phimp",
  url =          "http://arxiv.org/abs/1007.2075",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT10/alt10.jhtml",
  pdf =          "http://www.hutter1.net/publ/phimp.pdf",
  latex =        "http://www.hutter1.net/publ/phimp.tex",
  slides =       "http://www.hutter1.net/publ/sphimp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "0302-9743",
  isbn =         "978-3-642-16107-0",
  doi =          "10.1007/978-3-642-16108-7_29",
  keywords =     "Markov Process (MP); Hidden Markov Model (HMM);
                  Finite State Machine (FSM);
                  Probabilistic Deterministic Finite State Automata (PDFA);
                  Penalized Maximum Likelihood (PML);
                  ergodicity; asymptotic consistency; suffix trees; model selection;
                  learning; reduction; side information; reinforcement learning.",
  abstract =     "We are studying long term sequence prediction (forecasting). We
                  approach this by investigating criteria for choosing a compact
                  useful state representation. The state is supposed to summarize
                  useful information from the history. We want a method that is
                  asymptotically consistent in the sense it will provably eventually
                  only choose between alternatives that satisfy an optimality property
                  related to the used criterion. We extend our work to the case where
                  there is side information that one can take advantage of and,
                  furthermore, we briefly discuss the active setting where an agent
                  takes actions to achieve desirable outcomes.",
  support =      "ARC grant DP0988049",
  for =          "080401(30%),010405(30%),080101(20%),080198(20%)",
  znote =       "Acceptance rate: 26/44 = 59\%",
}
@Article{Hutter:10ctoex,
  author =       "Marcus Hutter",
  title =        "A Complete Theory of Everything (will be subjective)",
  journal =      "Algorithms",
  volume =       "3",
  number =       "4",
  pages =        "329--350",
  _month =        sep,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ctoex",
  url =          "http://arxiv.org/abs/0912.5434",
  pdf =          "http://www.hutter1.net/publ/ctoex.pdf",
  latex =        "http://www.hutter1.net/publ/ctoex.tex",
  slides =       "http://www.hutter1.net/publ/sctoe.pdf",
  video =        "http://pirsa.org/displayFlash.php?id=18040117",
  art =          "http://www.hutter1.net/publ/ctoel.jpg",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.3390/a3040329",
  issn =         "1999-4893",
  keywords =     "world models; observer localization; predictive power;
                  Ockham's razor; universal theories; inductive reasoning;
                  simplicity and complexity; universal self-sampling;
                  no-free-lunch; computability.",
  abstract =     "Increasingly encompassing models have been suggested for our world.
                  Theories range from generally accepted to increasingly speculative
                  to apparently bogus. The progression of theories from ego- to geo-
                  to helio-centric models to universe and multiverse theories and
                  beyond was accompanied by a dramatic increase in the sizes of the
                  postulated worlds, with humans being expelled from their center to
                  ever more remote and random locations. Rather than leading to a true
                  theory of everything, this trend faces a turning point after which
                  the predictive power of such theories decreases (actually to zero).
                  Incorporating the location and other capacities of the observer into
                  such theories avoids this problem and allows to distinguish
                  meaningful from predictively meaningless theories. This also leads
                  to a truly complete theory of everything consisting of a
                  (conventional objective) theory of everything plus a (novel
                  subjective) observer process. The observer localization is neither
                  based on the controversial anthropic principle, nor has it anything
                  to do with the quantum-mechanical observation process. The suggested
                  principle is extended to more practical (partial, approximate,
                  probabilistic, parametric) world models (rather than theories of
                  everything). Finally, I provide a justification of Ockham's razor,
                  and criticize the anthropic principle, the doomsday argument, the no
                  free lunch theorem, and the falsifiability dogma.",
  for =          "080401(70%),020103(30%)",
}
@TechReport{Hutter:10pdpx,
  author =       "Wray Buntine and Marcus Hutter",
  title =        "A {B}ayesian Review of the {P}oisson-{D}irichlet Process",
  institution =  "NICTA and ANU",
  address =      "Australia",
  number =       "arXiv:1007.0296",
  _month =        jul,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pdpx",
  url =          "http://arxiv.org/abs/1007.0296",
  pdf =          "http://www.hutter1.net/publ/pdpx.pdf",
  latex =        "http://www.hutter1.net/publ/pdpx.zip",
  slides =       "http://www.hutter1.net/publ/spdp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  keywords =     "Pitman-Yor process; Dirichlet;
                  two-parameter Poisson-Dirichlet process;
                  Chinese Restaurant Process; Consistency;
                  (non)atomic distributions;
                  Bayesian interpretation.",
  abstract =     "The two parameter Poisson-Dirichlet process  is also known as the
                  Pitman-Yor Process and related to the Chinese Restaurant Process, is
                  a generalisation of the Dirichlet Process, and is  increasingly
                  being used for probabilistic modelling in discrete areas such as
                  language and images. This article reviews the theory of the
                  Poisson-Dirichlet process in terms of its consistency for
                  estimation, the convergence rates and the posteriors of data. This
                  theory has been well developed for continuous distributions (more
                  generally referred to as non-atomic distributions). This article
                  then presents a Bayesian interpretation of the Poisson-Dirichlet
                  process: it is a mixture using an improper and infinite dimensional
                  Dirichlet distribution. This interpretation requires technicalities
                  of priors, posteriors and Hilbert spaces, but conceptually, this
                  means we can understand the process as just another Dirichlet and
                  thus all its sampling properties fit naturally. Finally, this
                  article also presents results for the discrete case which is the
                  case seeing widespread use now in computer science, but which has
                  received less attention in the literature.",
  for =          "080404(50%),080405(50%)",
}
@InProceedings{Hutter:10aixictw,
  author =       "Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver",
  title =        "Reinforcement Learning via {AIXI} Approximation",
  booktitle =    "Proc. 24th AAAI Conference on Artificial Intelligence",
  pages =        "605--611",
  _editor =       "Maria Fox and David Poole",
  publisher =    "AAAI Press",
  address =      "Atlanta, USA",
  _month =        jul,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixictw",
  url =          "http://arxiv.org/abs/1007.2049",
  pdf =          "http://www.hutter1.net/publ/aixictw.pdf",
  latex =        "http://www.hutter1.net/publ/aixictw.zip",
  slides =       "http://www.hutter1.net/publ/saixictw.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  code =         "http://www.jveness.info/software/mc-aixi-src-1.0.zip",
  keywords =     "Reinforcement Learning (RL);
                  Context Tree Weighting (CTW);
                  Monte Carlo Tree Search (MCTS);
                  Upper Confidence bounds applied to Trees (UCT);
                  Partially Observable Markov Decision Process (POMDP);
                  Prediction Suffix Trees (PST).",
  abstract =     "This paper introduces a principled approach for the design of a
                  scalable general reinforcement learning agent. This approach is
                  based on a direct approximation of AIXI, a Bayesian optimality
                  notion for general reinforcement learning agents. Previously, it has
                  been unclear whether the theory of AIXI could motivate the design of
                  practical algorithms. We answer this hitherto open question in the
                  affirmative, by providing the first computationally feasible
                  approximation to the AIXI agent. To develop our approximation, we
                  introduce a Monte Carlo Tree Search algorithm along with an
                  agent-specific extension of the Context Tree Weighting algorithm.
                  Empirically, we present a set of encouraging results on a number of
                  stochastic, unknown, and partially observable domains.",
  support =      "ARC grant DP0988049",
  for =          "080401(20%),010404(20%),080101(60%)",
  znote =        "Acceptance rate: 264/982 = 27\%",
}
@Article{Hutter:10cnlohx,
  author =       "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
  title =        "An Integrated {B}ayesian Analysis of {LOH} and Copy Number Data",
  journal =      "BMC Bioinformatics",
  volume =       "11",
  number =       "321",
  pages =        "1--18",
  _month =        jun,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#cnlohx",
  http =         "http://www.biomedcentral.com/1471-2105/11/321",
  supplement =   "http://www.biomedcentral.com/imedia/1222342299388240/supp2.pdf",
  pdf =          "http://www.hutter1.net/publ/cnlohx.pdf",
  slides =       "http://www.hutter1.net/publ/scnloh.pdf",
  poster =       "http://www.hutter1.net/publ/pcnloh.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#big",
  code =         "http://www.biomedcentral.com/imedia/1280629245356661/supp1.zip",
  doi =          "10.1186/1471-2105-11-321",
  issn =         "1471-2105",
  keywords =     "Bayesian regression; piecewise constant function;
                  change point problem; DNA copy number estimation; LOH estimation",
  abstract =     "Background: Cancer and other disorders are due to genomic lesions.
                  SNP-microarrays are able to measure simultaneously both genotype and
                  copy number (CN) at several Single Nucleotide Polymorphisms (SNPs)
                  along the genome. CN is defined as the number of DNA copies, and the
                  normal is two, since we have two copies of each chromosome. The
                  genotype of a SNP is the status given by the nucleotides (alleles)
                  which are present on the two copies of DNA. It is defined homozygous
                  or heterozygous if the two alleles are the same or if they differ,
                  respectively. Loss of heterozygosity (LOH) is the loss of the
                  heterozygous status due to genomic events. Combining CN and LOH
                  data, it is possible to better identify different types of genomic
                  aberrations. For example, a long sequence of homozygous SNPs might
                  be caused by either the physical loss of one copy or a uniparental
                  disomy event (UPD), i.e. each SNP has two identical nucleotides both
                  derived from only one parent. In this situation, the knowledge of
                  the CN can help in distinguishing between these two events.
                    Results: To better identify genomic aberrations, we propose a method
                  (called gBPCR) which infers the type of aberration occurred, taking
                  into account all the possible influence in the microarray detection
                  of the homozygosity status of the SNPs, resulting from an altered CN
                  level. Namely, we model the distributions of the detected genotype,
                  given a specific genomic alteration and we estimate the parameters
                  involved on public reference datasets. The estimation is performed
                  similarly to the modified Bayesian Piecewise Constant Regression,
                  but with improved estimators for the detection of the breakpoints.
                  Using artificial and real data, we evaluate the quality of the
                  estimation of gBPCR and we also show that it outperforms other
                  well-known methods for LOH estimation.
                    Conclusions: We propose a method (gBPCR) for the estimation of both
                  LOH and CN aberrations, improving their estimation by integrating
                  both types of data and accounting for their relationships. Moreover,
                  gBPCR performed very well in comparison with other methods for LOH
                  estimation and the estimated CN lesions on real data have been
                  validated with another technique.",
  support =      "Swiss National Science Foundation grants 205321-112430 and 205320-121886/1;
                  On-cosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008;
                  Cantone Ticino Ticino in rete grant;
                  Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)",
  alt =          "Also talk at 10th ISBA and IWPACBB'09",
  for =          "010405(50%),060405(50%)",
}
@Book{Hutter:10agiproc,
  editor =       "Eric Baum and Marcus Hutter and Emanuel Kitzelmann",
  title =        "Artificial General Intelligence",
  subtitle =     "3rd Conference ({AGI'10}) in Memoriam Ray Solomonoff",
  publisher =    "Atlantis Press",
  address =      "Lugano, Switzerland",
  _month =        mar,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#agiproc10",
  http =         "http://www.atlantis-press.com/publications/aisr/AGI-10/",
  pdf =          "http://www.hutter1.net/ai/agifb10.pdf",
  pdffull =      "http://www.hutter1.net/ai/agiproc10.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "1951-6851",
  isbn =         "978-90-78677-36-9",
  abstract =     "The Conference on Artificial General Intelligence is the only major
                  conference series devoted wholly and specifically to the creation of
                  AI systems possessing general intelligence at the human level and
                  ultimately beyond. Its third installation, AGI-10, was help in Lugano,
                  Switzerland, March 5-8, 2010, in Memoriam Ray Solomonoff (1926-2009),
                  pioneer of machine learning, founder of algorithmic probability theory,
                  and father of the universal theory of inductive inference.
                  The conference attracted 66 paper submissions of which
                  29 (i.e., 44\%) were accepted as full papers for presentation at the
                  conference. Additional 12 papers were included as short position papers.
                  The program also included a keynote address by the reinforcement learning
                  pioneer Richard Sutton, two post-conference workshops, and a number of
                  pre-conference tutorials on various topics related to AGI.",
  for =          "080101(50%),080199(50%)",
}
@Article{Hutter:10lorpx,
  author =       "Marcus Hutter and Minh Tran",
  title =        "Model Selection with the Loss Rank Principle",
  volume =       "54",
  journal =      "Computational Statistics and Data Analysis",
  publisher =    "Elsevier",
  pages =        "1288--1306",
  _month =        feb,
  year =         "2010",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lorpx",
  url =          "http://arxiv.org/abs/1003.0516",
  pdf =          "http://www.hutter1.net/ai/lorpx.pdf",
  ps =           "http://www.hutter1.net/ai/lorpx.ps",
  latex =        "http://www.hutter1.net/ai/lorpx.zip",
  slides =       "http://www.hutter1.net/ai/slorp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  code =         "http://www.hutter1.net/ai/lorpcode.zip",
  doi =          "10.1016/j.csda.2009.11.015",
  issn =         "0167-9473",
  keywords =     "Model selection, loss rank principle,
                  non-parametric regression, classification
                  general loss function, k nearest neighbors.",
  abstract =     "A key issue in statistics and machine learning is to automatically
                  select the ``right'' model complexity, e.g., the number of neighbors
                  to be averaged over in k nearest neighbor (kNN) regression or the
                  polynomial degree in regression with polynomials. We suggest a novel
                  principle - the Loss Rank Principle (LoRP) - for model selection in
                  regression and classification. It is based on the loss rank, which
                  counts how many other (fictitious) data would be fitted better. LoRP
                  selects the model that has minimal loss rank. Unlike most penalized
                  maximum likelihood variants (AIC, BIC, MDL), LoRP depends only on
                  the regression functions and the loss function. It works without a
                  stochastic noise model, and is directly applicable to any
                  non-parametric regressor, like kNN.",
  for =          "080401(20%),010405(80%)",
}

%-------------Publications-of-Marcus-Hutter-2009--------------%

@InProceedings{Hutter:09mdltvp,
  author =       "Marcus Hutter",
  title =        "Discrete {MDL} Predicts in Total Variation",
  booktitle =    "Advances in Neural Information Processing Systems 22 ({NIPS'09})",
  pages =        "817--825",
  _editor =      "Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta",
  publisher =    "Curran Associates",
  address =      "Cambridge, MA, USA",
  _month =        dec,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mdltvp",
  url =          "http://arxiv.org/abs/0909.4588",
  pdf =          "http://www.hutter1.net/ai/mdltvp.pdf",
  ps =           "http://www.hutter1.net/ai/mdltvp.ps",
  latex =        "http://www.hutter1.net/ai/mdltvp.tex",
  slides =       "http://www.hutter1.net/ai/smdltvp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  isbn =         "1615679111",
  keywords =     "minimum description length; countable model class;
                  total variation distance; sequence prediction;
                  discriminative learning; reinforcement learning.",
  abstract =     "The Minimum Description Length (MDL) principle selects the model
                  that has the shortest code for data plus model. We show that for a
                  countable class of models, MDL predictions are close to the true
                  distribution in a strong sense. The result is completely general. No
                  independence, ergodicity, stationarity, identifiability, or other
                  assumption on the model class need to be made. More formally, we
                  show that for any countable class of models, the distributions
                  selected by MDL (or MAP) asymptotically predict (merge
                  with) the true measure in the class in total variation distance.
                  Implications for non-i.i.d. domains like time-series forecasting,
                  discriminative learning, and reinforcement learning are discussed.",
  for =          "080401(30%),010405(50%),080198(20%)",
  znote =        "Acceptance rate: 263/1105 = 24\%",
}
@InProceedings{Hutter:09wheel,
  author =       "Marcus Hutter and Nathan Brewer",
  title =        "Matching 2-D Ellipses to 3-D Circles with Application to Vehicle Pose Estimation",
  booktitle =    "Proc. 24th Conf. on Image and Vision Computing New Zealand ({IVCNZ'09})",
  pages =        "153--158",
  _editor =       "Donald Bailey",
  publisher =    "IEEE Xplore",
  address =      "Wellington, New Zealand",
  _month =        nov,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#wheel",
  url =          "http://arxiv.org/abs/0912.3589",
  pdf =          "http://www.hutter1.net/ai/wheel.pdf",
  latex =        "http://www.hutter1.net/ai/wheel.zip",
  slides =       "http://www.hutter1.net/ai/swheel.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#icar",
  code =         "http://www.hutter1.net/ai/wheelcode.zip",
  doi =          "10.1109/IVCNZ.2009.5378421",
  issn =         "2151-2205",
  keywords =     "computer vision; image recognition/processing; ellipse detection; 3d models;
                  2d-ellipse to 3d-circle matching; single image pose identification;
                  wheel detection; 3d vehicle models.",
  abstract =     "Finding the three-dimensional representation of all or a part of a
                  scene from a single two dimensional image is a challenging task. In
                  this paper we propose a method for identifying the pose and location
                  of objects with circular protrusions in three dimensions from a
                  single image and a 3d representation or model of the object of
                  interest. To do this, we present a method for identifying ellipses
                  and their properties quickly and reliably with a novel technique
                  that exploits intensity differences between objects and a geometric
                  technique for matching an ellipse in 2d to a circle in 3d.
                  We apply these techniques to the specific problem of determining the
                  pose and location of vehicles, particularly cars, from a single
                  image. We have achieved excellent pose recovery performance on
                  artificially generated car images and show promising results on real
                  vehicle images. We also make use of the ellipse detection method to
                  identify car wheels from images, with a very high successful match
                  rate.",
  support =      "ControlExpert GmbH",
  znote =        "Acceptance rate: 79/142 = 56\%",
}
@Article{Hutter:09mbpcrcode,
  author =       "Paola M.V. Rancoita and Marcus Hutter",
  title =        "mBPCR: A Package for DNA Copy Number Profile Estimation",
  journal =      "BioConductor -- Open Source Software for BioInformatics",
  number =       "0.99",
  pages =        "1--25",
  _month =        oct,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mbpcrcode",
  url =          "http://www.bioconductor.org/packages/devel/bioc/html/mBPCR.html",
  pdf =          "http://www.hutter1.net/ai/mbpcrcode.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#big",
  code =         "http://www.hutter1.net/ai/mbpcrcode.tar.gz",
  keywords =     "Bayesian regression, exact polynomial algorithm, piecewise constant function,
                  mBPCR, DNA copy number estimation, micro arrays, genomic aberrations, R package.",
  abstract =     "The algorithm mBPCR is a tool for estimating the profile of the
                  log2ratio of copy number data. The procedure is a Bayesian piecewise
                  constant regression and can be applied, generally, to estimate any
                  piecewise constant function (like the log2ratio of the copy number
                  data). The algorithm has been implemented in R and integrated into
                  bioconductor, an open source software for bioinformatics. This
                  document describes how to use the mBPCR bioconductor package in
                  general and on several examples.",
  support =      "SNF grant 205321-112430",
}
@Article{Hutter:09phimdpx,
  author =       "Marcus Hutter",
  title =        "Feature Reinforcement Learning: Part {I}: Unstructured {MDP}s",
  journal =      "Journal of Artificial General Intelligence",
  volume =       "1",
  pages =        "3--24",
  _month =        oct,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#phimdpx",
  url =          "http://arxiv.org/abs/0906.1713",
  pdf =          "http://www.hutter1.net/ai/phimdpx.pdf",
  ps =           "http://www.hutter1.net/ai/phimdpx.ps",
  latex =        "http://www.hutter1.net/ai/phimdpx.tex",
  slides =       "http://www.hutter1.net/ai/sphimdp.pdf",
  video =        "http://www.vimeo.com/7390883",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "1946-0163",
  keywords =     "Reinforcement learning; Markov decision process;
                  partial observability; feature learning; explore-exploit;
                  information \& complexity; rational agents.",
  abstract =     "General-purpose, intelligent, learning agents cycle through
                  sequences of observations, actions, and rewards that are complex,
                  uncertain, unknown, and non-Markovian. On the other hand,
                  reinforcement learning is well-developed for small finite state
                  Markov decision processes (MDPs). Up to now, extracting the right
                  state representations out of bare observations, that is, reducing
                  the general agent setup to the MDP framework, is an art that
                  involves significant effort by designers. The primary goal of this
                  work is to automate the reduction process and thereby significantly
                  expand the scope of many existing reinforcement learning algorithms
                  and the agents that employ them. Before we can think of mechanizing
                  this search for suitable MDPs, we need a formal objective criterion.
                  The main contribution of this article is to develop such a
                  criterion. I also integrate the various parts into one learning
                  algorithm. Extensions to more realistic dynamic Bayesian networks
                  are developed in Part II. The role of POMDPs is also considered there.",
}
  @Article{Hutter:09phidbnx,
    author =       "M. Hutter",
    title =        "Feature Reinforcement Learning: Part {II}: Structured {MDP}s",
    journal =      "Journal of Artificial General Intelligence",
    pages =        "71--86",
    _month =        jun,
    year =         "2009",
    bibtex =       "http://www.hutter1.net/official/bib.htm#phidbnx",
    pdf =          "http://www.hutter1.net/publ/phidbnx.pdf",
    slides =       "http://www.hutter1.net/publ/sphimdp.pdf",
    project =      "http://www.hutter1.net/official/projects.htm#uai",
    doi =          "10.2478/jagi-2021-0003"
    keywords =     "Reinforcement learning; dynamic Bayesian network; structure learning;
                    feature selection; global vs. local reward; explore-exploit;
                    information \& complexity; rational agents; partial observability",
    abstract =     "The Feature Markov Decision Processes (PhiMDP) model developed
                    in Part I is well-suited for learning agents in general environments.
                    Nevertheless, unstructured (Phi)MDPs are limited to relatively
                    simple environments. Structured MDPs like Dynamic Bayesian Networks
                    (DBNs) are used for large-scale real-world problems.
                    In this article I extend PhiMDP to PhiDBN.
                    The primary contribution is to derive a cost criterion that allows
                    to automatically extract the most relevant features from the
                    environment, leading to the ``best'' DBN representation.
                    I discuss all building blocks required for a complete general
                    learning algorithm, and compare the novel PhiDBN model to the
                    prevalent POMDP approach.",
  }
@Article{Hutter:09aixiopen,
  author =       "Marcus Hutter",
  title =        "Open Problems in Universal Induction \& Intelligence",
  journal =      "Algorithms",
  volume =       "3",
  number =       "2",
  pages =        "879--906",
  _month =        jul,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixiopen",
  url =          "http://arxiv.org/abs/0907.0746",
  pdf =          "http://www.hutter1.net/ai/aixiopen.pdf",
  ps =           "http://www.hutter1.net/ai/aixiopen.ps",
  latex =        "http://www.hutter1.net/ai/aixiopen.tex",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.3390/a2030879",
  issn =         "1999-4893",
  keywords =     "Kolmogorov complexity; information theory;
                  sequential decision theory; reinforcement learning;
                  artificial intelligence; universal Solomonoff induction;
                  rational agents.",
  abstract =     "Specialized intelligent systems can be found everywhere: finger
                  print, handwriting, speech, and face recognition, spam filtering,
                  chess and other game programs, robots, et al. This decade the first
                  presumably complete {\em mathematical} theory of artificial
                  intelligence based on universal induction-prediction-decision-action
                  has been proposed. This information-theoretic approach solidifies
                  the foundations of inductive inference and artificial intelligence.
                  Getting the foundations right usually marks a significant progress
                  and maturing of a field. The theory provides a gold standard and
                  guidance for researchers working on intelligent algorithms. The
                  roots of universal induction have been laid exactly half-a-century
                  ago and the roots of universal intelligence exactly one decade ago.
                  So it is timely to take stock of what has been achieved and what
                  remains to be done. Since there are already good recent surveys, I
                  describe the state-of-the-art only in passing and refer the reader
                  to the literature. This article concentrates on the open problems in
                  universal induction and its extension to universal intelligence.",
}
@InProceedings{Hutter:09cnloh,
  author =       "Paola M.V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
  title =        "Bayesian Joint Estimation of {CN} and {LOH} Aberrations",
  booktitle =    "Proc. 3rd International Workshop on Practical Applications of Computational Biology & Bioinformatics ({IWPACBB'09}) ",
  volume =       "5518",
  series =       "LNCS",
  pages =        "1109--1117",
  _editor =       "S. Omatu et al.",
  publisher =    "Springer",
  address =      "Salamanca, Spain",
  _month =        jun,
  year =         "2009",
  url =          "http://iwpacbb.usal.es/",
  pdf =          "http://www.hutter1.net/publ/cnloh.pdf",
  slides =       "http://www.hutter1.net/publ/scnloh.pdf",
  poster =       "http://www.hutter1.net/publ/pcnloh.pdf",
  http =         "http://iwpacbb.usal.es/",
  doi =          "10.1007/978-3-642-02481-8_168",
  issn =         "0302-9743",
  isbn =         "978-3-642-02480-1",
  keywords =     "Bayesian regression; piecewise constant function;
                  change point problem; DNA copy number estimation; LOH estimation",
  abstract =     "SNP-microarrays are able to measure simultaneously both copy number
                  and genotype at several single nucleotide polymorphism positions.
                  Combining the two data, it is possible to better identify genomic
                  aberrations. For this purpose, we propose a Bayesian piecewise
                  constant regression which infers the type of aberration occurred,
                  taking into account all the possible influence in the microarray
                  detection of the genotype, resulting from an altered copy number
                  level. Namely, we model the distributions of the detected genotype
                  given a specific genomic alteration and we estimate the
                  hyper-parameters used on public reference datasets.",
  support =      "Swiss National Science Foundation grant 205321-112430;
                  Oncosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008;
                  Cantone Ticino ``Ticino in rete'' grant;
                  Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)",
}
@InProceedings{Hutter:09ldof,
  author =       "Ke Zhang and Marcus Hutter and Warren Jin",
  title =        "A New Local Distance-based Outlier Detection Approach for Scattered Real-World Data",
  booktitle =    "Proc. 13th Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD'09)",
  series =       "LNAI",
  volume =       "5467",
  pages =        "813--822",
  _editor =       "T. Theeramunkong and B. Kijsirikul and N. Cercone and H. T. Bao",
  publisher =    "Springer",
  address =      "Bangkok, Thailand",
  _month =        apr,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ldof",
  url =          "http://arxiv.org/abs/0903.3257",
  pdf =          "http://www.hutter1.net/ai/ldof.pdf",
  ps =           "http://www.hutter1.net/ai/ldof.ps",
  latex =        "http://www.hutter1.net/ai/ldof.zip",
  slides =       "http://www.hutter1.net/ai/sldof.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#???",
  doi =          "10.1007/978-3-642-01307-2_84",
  issn =         "0302-9743 ",
  isbn =         "978-3-642-01306-5",
  keywords =     "local outlier; scattered data; k-distance; KNN; LOF; LDOF.",
  abstract =     "Detecting outliers which are grossly different from or inconsistent
                  with the remaining dataset is a major challenge in real-world KDD
                  applications. Existing outlier detection methods are ineffective on
                  scattered real-world datasets due to implicit data patterns and
                  parameter setting issues. We define a novel ``Local
                  Distance-based Outlier Factor'' (LDOF) to measure the outlier-ness
                  of objects in scattered datasets which addresses these issues. LDOF
                  uses the relative location of an object to its neighbours to
                  determine the degree to which the object deviates from its
                  neighbourhood.
                  Properties of LDOF are theoretically analysed including LDOF's lower
                  bound and its false-detection probability, as well as parameter
                  settings. In order to facilitate parameter settings in real-world
                  applications, we employ a top-n technique in our outlier detection
                  approach, where only the objects with the highest LDOF values are
                  regarded as outliers. Compared to conventional approaches (such as
                  top-n KNN and top-n LOF), our method top-n LDOF is more
                  effective at detecting outliers in scattered data. It is also easier
                  to set parameters, since its performance is relatively stable over a
                  large range of parameter values, as illustrated by experimental
                  results on both real-world and synthetic datasets.",
  znote =        "Acceptance rate: 111/338 = 33\%",
}
@Article{Hutter:09alttcs,
  author =       "Marcus Hutter and Rocco A. Servedio",
  title =        "{ALT'07} Special Issue",
  journal =      "Theoretical Computer Science",
  _editor =       "Marcus Hutter and Rocco A. Servedio",
  volume =       "410",
  number =       "19",
  pages =        "1747--1748/1912",
  _month =        apr,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#alttcs",
  http =         "http://www.sciencedirect.com/science/journal/03043975/410/19",
  doi =          "10.1016/j.tcs.2009.01.008",
  issn =         "0304-3975",
  keywords =     "algorithmic learning theory, special issue, preface",
  abstract =     "This special issue contains expanded versions of papers that appeared in
                  preliminary form in the proceedings of the 18th International Conference
                  on Algorithmic Learning Theory (ALT 2007), which was held in Sendai,
                  Japan during October 1--4, 2007.  \emph{Algorithmic Learning Theory} is
                  a conference series which is dedicated to the theoretical study of the
                  algorithmic aspects of learning.  The best papers of the conference ALT 2007
                  were invited for this special issue and after a thorough reviewing process,
                  most of them qualified for this Special Issue on Algorithmic Learning Theory
                  of Theoretical Computer Science. The preface contains a short introduction
                  to each of these papers.",
}
@Article{Hutter:09improbx,
  author =       "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter",
  title =        "Limits of Learning about a Categorical Latent Variable under Prior Near-Ignorance",
  journal =      "International Journal of Approximate Reasoning",
  volume =       "50",
  number =       "4",
  pages =        "597--611",
  _month =        apr,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#improbx",
  url =          "http://arxiv.org/abs/0904.4527",
  pdf =          "http://www.hutter1.net/ai/improbx.pdf",
  ps =           "http://www.hutter1.net/ai/improbx.ps",
  latex =        "http://www.hutter1.net/ai/improbx.tex",
  slides =       "http://www.hutter1.net/ai/simprob.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#robust",
  doi =          "10.1016/j.ijar.2008.08.003",
  issn =         "0888-613X",
  keywords =     "Near-ignorance set of priors; Latent variables; Imprecise Dirichlet model.",
  abstract =     "In this paper, we consider the coherent theory of (epistemic)
                  uncertainty of Walley, in which beliefs are represented through sets
                  of probability distributions, and we focus on the problem of
                  modeling prior ignorance about a categorical random variable. In
                  this setting, it is a known result that a state of prior ignorance
                  is not compatible with learning. To overcome this problem, another
                  state of beliefs, called \emph{near-ignorance}, has been proposed.
                  Near-ignorance resembles ignorance very closely, by satisfying some
                  principles that can arguably be regarded as necessary in a state of
                  ignorance, and allows learning to take place. What this paper does,
                  is to provide new and substantial evidence that also near-ignorance
                  cannot be really regarded as a way out of the problem of starting
                  statistical inference in conditions of very weak beliefs. The key to
                  this result is focusing on a setting characterized by a variable of
                  interest that is \emph{latent}. We argue that such a setting is by
                  far the most common case in practice, and we provide, for the case
                  of categorical latent variables (and general \emph{manifest}
                  variables) a condition that, if satisfied, prevents learning to take
                  place under prior near-ignorance. This condition is shown to be
                  easily satisfied even in the most common statistical problems. We
                  regard these results as a strong form of evidence against the
                  possibility to adopt a condition of prior near-ignorance in real
                  statistical problems.",
}
@TechReport{Hutter:09bayestreex,
  author =       "Marcus Hutter",
  title =        "Exact Non-Parametric {B}ayesian Inference on Infinite Trees",
  number =       "0903.5342",
  institution =  "ARXIV",
  _month =        mar,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#bayestreex",
  url =          "http://arxiv.org/abs/0903.5342",
  pdf =          "http://www.hutter1.net/ai/bayestreex.pdf",
  ps =           "http://www.hutter1.net/ai/bayestreex.ps",
  latex =        "http://www.hutter1.net/ai/bayestreex.zip",
  slides =       "http://www.hutter1.net/ai/sbayestree.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  code =         "http://www.hutter1.net/ai/bayestree.c",
  keywords =     "Bayesian density estimation, exact linear time algorithm,
                  non-parametric inference, adaptive infinite tree, Polya tree,
                  scale invariance, consistency, asymptotics.",
  msc =          "62G07; 60B10; 68W99",
  abstract =     "Given i.i.d. data from an unknown distribution, we consider the
                  problem of predicting future items. An adaptive way to estimate
                  the probability density is to recursively subdivide the domain to
                  an appropriate data-dependent granularity. A Bayesian would assign
                  a data-independent prior probability to ``subdivide'', which leads
                  to a prior over infinite(ly many) trees. We derive an exact, fast,
                  and simple inference algorithm for such a prior, for the data
                  evidence, the predictive distribution, the effective model
                  dimension, moments, and other quantities. We prove asymptotic
                  convergence and consistency results, and illustrate the behavior
                  of our model on some prototypical functions.",
}
@Book{Hutter:09agiproc,
  editor =       "Ben Goertzel and Pascal Hitzler and Marcus Hutter",
  title =        "Artificial General Intelligence",
  subtitle =     "2nd Conference ({AGI'09})",
  publisher =    "Atlantis Press",
  address =      "Arlington, USA",
  _month =        mar,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#agiproc09",
  http =         "http://www.atlantis-press.com/publications/aisr/AGI-09/",
  pdf =          "http://www.hutter1.net/ai/agifb09.pdf",
  pdfall =       "http://www.hutter1.net/ai/agiproc09.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  issn =         "1951-6851",
  isbn =         "978-90-78677-24-6",
  abstract =     "The Conference on Artificial General Intelligence is the only major
                  conference series devoted wholly and specifically to the creation of
                  AI systems possessing general intelligence at the human level and
                  ultimately beyond. Its second installation, AGI-09, in Arlington,
                  Virginia, March 6-9, 2009, attracted 67 paper submissions, which is
                  a substantial increase from the previous year. Of these submissions,
                  33 (i.e., 49\%) were accepted as full papers for presentation at the
                  conference. Additional 13 papers were included as position papers.
                  The program also included a keynote address by J{\"u}rgen
                  Schmidhuber on \emph{The New AI}, a post-conference workshop on
                  \emph{The Future of AI}, and a number of pre-conference tutorials on
                  various topics related to AGI.",
}
@InProceedings{Hutter:09phimdp,
  author =       "Marcus Hutter",
  title =        "Feature {M}arkov Decision Processes",
  booktitle =    "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})",
  subtitle =     "Advances in Intelligent Systems Research",
  volume =       "8",
  pages =        "61--66",
  publisher =    "Atlantis Press",
  _address =      "Arlington, Virginia",
  _month =        mar,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#phimdp",
  url =          "http://arXiv.org/abs/0812.4580",
  pdf =          "http://www.hutter1.net/ai/phimdp.pdf",
  ps =           "http://www.hutter1.net/ai/phimdp.ps",
  latex =        "http://www.hutter1.net/ai/phimdp.tex",
  slides =       "http://www.hutter1.net/ai/sphimdp.pdf",
  video =        "http://www.vimeo.com/7390883",
  award =        "http://agi-conf.org/2009/kurzweilprize.php",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.2991/agi.2009.30",
  issn =         "1951-6851",
  isbn =         "978-90-78677-24-6",
  keywords =     "Reinforcement learning; Markov decision process;
                  partial observability; feature learning; explore-exploit.",
  abstract =     "General purpose intelligent learning agents cycle through
                  (complex,non-MDP) sequences of observations, actions, and rewards.
                  On the other hand, reinforcement learning is well-developed for
                  small finite state Markov Decision Processes (MDPs). So far it is an
                  art performed by human designers to extract the right state
                  representation out of the bare observations, i.e. to reduce the
                  agent setup to the MDP framework. Before we can think of mechanizing
                  this search for suitable MDPs, we need a formal objective criterion.
                  The main contribution of this article is to develop such a
                  criterion. I also integrate the various parts into one learning
                  algorithm. Extensions to more realistic dynamic Bayesian networks
                  are developed in a companion article.",
  znote =         "Acceptance rate: 33/67 = 49\%. First Runner-Up for the Kurzweil Best Paper Award",
}
@InProceedings{Hutter:09phidbn,
  author =       "Marcus Hutter",
  title =        "Feature Dynamic {B}ayesian Networks",
  booktitle =    "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})",
  subtitle =     "Advances in Intelligent Systems Research",
  volume =       "8",
  pages =        "67--73",
  publisher =    "Atlantis Press",
  _address =      "Arlington, Virginia",
  _month =        mar,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#phidbn",
  url =          "http://arXiv.org/abs/0812.4581",
  pdf =          "http://www.hutter1.net/ai/phidbn.pdf",
  ps =           "http://www.hutter1.net/ai/phidbn.ps",
  latex =        "http://www.hutter1.net/ai/phidbn.tex",
  slides =       "http://www.hutter1.net/ai/sphimdp.pdf",
  video =        "http://www.vimeo.com/7390883",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.2991/agi.2009.6",
  issn =         "1951-6851",
  isbn =         "978-90-78677-24-6",
  keywords =     "Reinforcement learning; dynamic Bayesian network;
                  structure learning; feature learning;
                  global vs. local reward; explore-exploit.",
  abstract =     "Feature Markov Decision Processes (PhiMDPs) are well-suited for
                  learning agents in general environments. Nevertheless, unstructured
                  (Phi)MDPs are limited to relatively simple environments. Structured
                  MDPs like Dynamic Bayesian Networks (DBNs) are used for large-scale
                  real-world problems. In this article I extend PhiMDP to PhiDBN. The
                  primary contribution is to derive a cost criterion that allows to
                  automatically extract the most relevant features from the
                  environment, leading to the ``best'' DBN representation. I discuss all
                  building blocks required for a complete general learning algorithm.",
  znote =        "Acceptance rate: 33/67 = 49\%",
}
@Article{Hutter:09idmx,
  author =       "Marcus Hutter",
  title =        "Practical Robust Estimators under the {I}mprecise {D}irichlet {M}odel",
  journal =      "International Journal of Approximate Reasoning",
  volume =       "50",
  number =       "2",
  pages =        "231--242",
  _month =        feb,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#idmx",
  url =          "http://arxiv.org/abs/0901.4137",
  pdf =          "http://www.hutter1.net/ai/idmx.pdf",
  ps =           "http://www.hutter1.net/ai/idmx.ps",
  latex =        "http://www.hutter1.net/ai/idmx.tex",
  slides =       "http://www.hutter1.net/ai/sidm.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#robust",
  doi =          "10.1016/j.ijar.2008.03.020",
  issn =         "0888-613X",
  keywords =     "Imprecise Dirichlet Model; exact, conservative, approximate,
                  robust, credible interval estimates; entropy; mutual
                  information.",
  abstract =     "Walley's Imprecise Dirichlet Model (IDM) for categorical i.i.d.\
                  data extends the classical Dirichlet model to a set of priors. It
                  overcomes several fundamental problems which other approaches to
                  uncertainty suffer from. Yet, to be useful in practice, one needs
                  efficient ways for computing the imprecise=robust sets or
                  intervals. The main objective of this work is to derive exact,
                  conservative, and approximate, robust and credible interval
                  estimates under the IDM for a large class of statistical
                  estimators, including the entropy and mutual information.",
}
@Article{Hutter:09bcna,
  author =       "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
  title =        "Bayesian {DNA} Copy Number Analysis",
  journal =      "BMC Bioinformatics",
  volume =       "10",
  number =       "10",
  pages =        "1--19",
  _month =        jan,
  year =         "2009",
  bibtex =       "http://www.hutter1.net/official/bib.htm#bcna",
  http =         "http://www.biomedcentral.com/1471-2105/10/10",
  supplement =   "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s2.pdf",
  pdf =          "http://www.hutter1.net/ai/bcna.pdf",
  slides =       "http://www.hutter1.net/ai/sbcna.pdf",
  code =         "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s1.zip",
  doi =          "10.1186/1471-2105-10-10",
  issn =         "1471-2105",
  keywords =     "Bayesian regression, exact polynomial algorithm, piecewise constant function,
                  mBPCR, DNA copy number estimation, micro arrays, genomic aberrations.",
  abstract =     "Background: Some diseases, like tumors, can be related to
                  chromosomal aberrations, leading to changes of DNA copy number. The
                  copy number of an aberrant genome can be represented as a piecewise
                  constant function, since it can exhibit regions of deletions or
                  gains. Instead, in a healthy cell the copy number is two because we
                  inherit one copy of each chromosome from each our parents. Bayesian
                  Piecewise Constant Regression (BPCR) is a Bayesian regression method
                  for data that are noisy observations of a piecewise constant
                  function. The method estimates the unknown segment number, the
                  endpoints of the segments and the value of the segment levels of the
                  underlying piecewise constant function. The Bayesian Regression
                  Curve (BRC) estimates the same data with a smoothing curve. However,
                  in the original formulation, some estimators failed to properly
                  determine the corresponding parameters. For example, the boundary
                  estimator did not take into account the dependency among the
                  boundaries and succeeded in estimating more than one breakpoint at
                  the same position, losing segments.
                  Results: We derived an improved version of the BPCR (called mBPCR)
                  and BRC, changing the segment number estimator and the boundary
                  estimator to enhance the fitting procedure. We also proposed an
                  alternative estimator of the variance of the segment levels, which
                  is useful in case of data with high noise. Using artificial data, we
                  compared the original and the modified version of BPCR and BRC with
                  other regression methods, showing that our improved version of BPCR
                  generally outperformed all the others. Similar results were also
                  observed on real data.
                  Conclusions: We propose an improved method for DNA copy number
                  estimation, mBPCR, which performed very well compared to previously
                  published algorithms. In particular, mBPCR was more powerful in the
                  detection of the true position of the breakpoints and of small
                  aberrations in very noisy data. Hence, from a biological point of
                  view, our method can be very useful, for example, to find targets of
                  genomic aberrations in clinical cancer samples.",
  support =      "SNF grant 205321-112430",
  znote =        "Marked as highly accessed.",
  alt =          "Also 2-page abstract and poster at 9th ISBA and 18th MASAMB meetings (2008)",
  abstract2p =   "http://www.hutter1.net/publ/bcnas.pdf",
  poster =       "http://www.hutter1.net/publ/sbcnas.pdf",
}

%-------------Publications-of-Marcus-Hutter-2008--------------%

@Article{Hutter:08actoptx,
  author =       "Daniil Ryabko and Marcus Hutter",
  title =        "On the Possibility of Learning in Reactive Environments with Arbitrary Dependence",
  journal =      "Theoretical Computer Science",
  volume =       "405",
  number =       "3",
  pages =        "274--284",
  _month =        oct,
  year =         "2008",
  bibtex =       "http://www.hutter1.net/official/bib.htm#actoptx",
  url =          "http://arxiv.org/abs/0810.5636",
  pdf =          "http://www.hutter1.net/ai/actoptx.pdf",
  ps =           "http://www.hutter1.net/ai/actoptx.ps",
  latex =        "http://www.hutter1.net/ai/actoptx.tex",
  slides =       "http://www.hutter1.net/ai/sactopt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1016/j.tcs.2008.06.039",
  issn =         "0304-3975",
  keywords =     "Reinforcement learning, asymptotic average value,
                  self-optimizing policies, (non) Markov decision processes.",
  abstract =     "We address the problem of reinforcement learning in which
                  observations may exhibit an arbitrary form of stochastic dependence
                  on past observations and actions, i.e.\ environments more general
                  than (PO)MDPs. The task for an agent is to attain the  best possible
                  asymptotic reward where the true generating environment is unknown
                  but belongs to a known countable family of environments. We find
                  some sufficient conditions on the class of  environments under which
                  an agent exists which attains the best asymptotic reward for any
                  environment in the class. We analyze how tight these conditions are
                  and how they relate to different probabilistic assumptions known in
                  reinforcement learning and related fields, such as Markov Decision
                  Processes and mixing conditions.",
  support =      "SNF grant 200020-107616",
}
@InProceedings{Hutter:08phi,
  author =       "M. Hutter",
  title =        "Predictive Hypothesis Identification",
  booktitle =    "Presented at 9th Valencia /ISBA 2010 Meeting",
  pages =        "1--16",
  address =      "Benidorm",
  _month =        sep,
  year =         "2008",
  bibtex =       "http://www.hutter1.net/official/bib.htm#phi",
  url =          "http://arxiv.org/abs/0809.1270",
  pdf =          "http://www.hutter1.net/ai/phi.pdf",
  ps =           "http://www.hutter1.net/ai/phi.ps",
  latex =        "http://www.hutter1.net/ai/phi.tex",
  slides =       "http://www.hutter1.net/ai/sphi.pdf",
  poster =       "http://www.hutter1.net/ai/pphi.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  keywords =     "parameter estimation; hypothesis testing; model selection;
                  predictive inference; composite hypotheses; MAP versus ML;
                  moment fitting; Bayesian statistics.",
  abstract =     "While statistics focusses on hypothesis testing and on
                  estimating (properties of) the true sampling distribution, in
                  machine learning the performance of learning algorithms on
                  future data is the primary issue. In this paper we bridge the
                  gap with a general principle (PHI) that identifies hypotheses
                  with best predictive performance. This includes predictive
                  point and interval estimation, simple and composite hypothesis
                  testing, (mixture) model selection, and others as special
                  cases. For concrete instantiations we will recover well-known
                  methods, variations thereof, and new ones. PHI nicely
                  justifies, reconciles, and blends (a reparametrization
                  invariant variation of) MAP, ML, MDL, and moment estimation.
                  One particular feature of PHI is that it can genuinely deal
                  with nested hypotheses.",
}
@InProceedings{Hutter:08select,
  author =       "Kassel Hingee and Marcus Hutter",
  title =        "Equivalence of Probabilistic Tournament and Polynomial Ranking Selection",
  booktitle =    "Proc. 2008 Congress on Evolutionary Computation ({CEC'08})",
  pages =        "564--571",
  publisher =    "IEEE",
  address =      "Hongkong",
  isbn =         "978-1-4244-1823-7",
  _month =        jun,
  year =         "2008",
  bibtex =       "http://www.hutter1.net/official/bib.htm#select",
  url =          "http://arxiv.org/abs/0803.2925",
  pdf =          "http://www.hutter1.net/ai/select.pdf",
  ps =           "http://www.hutter1.net/ai/select.ps",
  latex =        "http://www.hutter1.net/ai/select.zip",
  slides =       "http://www.hutter1.net/ai/sselect.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#optimize",
  doi =          "10.1109/CEC.2008.4630852",
  keywords =     "evolutionary algorithms, ranking selection,
                  tournament selection, equivalence, efficiency.",
  abstract =     "Crucial to an Evolutionary Algorithm's performance is its selection
                  scheme. We mathematically investigate the relation between
                  polynomial rank and probabilistic tournament methods which are
                  (respectively) generalisations of the popular linear ranking and
                  tournament selection schemes. We show that every probabilistic
                  tournament is equivalent to a unique polynomial rank scheme. In
                  fact, we derived explicit operators for translating between these
                  two types of selection. Of particular importance is that most linear
                  and most practical quadratic rank schemes are probabilistic
                  tournaments.",
}
@Article{Hutter:08pquestx,
  author =       "Daniil Ryabko and Marcus Hutter",
  title =        "Predicting Non-Stationary Processes",
  journal =      "Applied Mathematics Letters",
  volume =       "21",
  number =       "5",
  pages =        "477--482",
  _month =        may,
  year =         "2008",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pquestx",
  url =          "http://arxiv.org/abs/cs.LG/0606077",
  pdf =          "http://www.hutter1.net/ai/pquestx.pdf",
  ps =           "http://www.hutter1.net/ai/pquestx.ps",
  latex =        "http://www.hutter1.net/ai/pquestx.tex",
  slides =       "http://www.hutter1.net/ai/spquest.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  doi =          "10.1016/j.aml.2007.04.004",
  issn =         "0893-9659",
  keywords =     "sequence prediction, local absolute continuity,
                  non-stationary measures, average/expected criteria,
                  absolute/KL divergence, mixtures of measures.",
  abstract =     "Suppose we are given two probability measures on the set of
                  one-way infinite finite-alphabet sequences and consider the
                  question when one of the  measures predicts the other, that is,
                  when conditional probabilities  converge (in a certain sense) when
                  one of the measures is chosen to generate the sequence. This
                  question may be considered a refinement of the problem of sequence
                  prediction in its most general formulation: for a given  class of
                  probability measures, does there exist a measure which predicts
                  all of the measures in the class? To address this problem, we find
                  some conditions on local absolute continuity which are sufficient
                  for prediction and which generalize several different notions
                  which are known to be sufficient for prediction. We also formulate
                  some open questions to outline a direction for finding the
                  conditions on classes of measures for which prediction is
                  possible.",
  support =      "SNF grant 200020-107616",
}
@Article{Hutter:08kolmo,
  author =       "Marcus Hutter",
  title =        "Algorithmic Complexity",
  journal =      "Scholarpedia",
  volume =       "3",
  number =       "1",
  pages =        "2573",
  _month =        jan,
  year =         "2008",
  bibtex =       "http://www.hutter1.net/official/bib.htm#kolmo",
  http =         "http://www.scholarpedia.org/article/Algorithmic_Complexity",
  pdf =          "http://www.hutter1.net/ai/kolmo.pdf",
  ps =           "http://www.hutter1.net/ai/kolmo.ps",
  latex =        "http://www.hutter1.net/ai/kolmo.zip",
  slides =       "http://www.hutter1.net/ai/sintro2kc.pdf",
  video =        "http://pirsa.org/displayFlash.php?id=18040109",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.4249/scholarpedia.2573",
  issn =         "1941-6016",
  keywords =     "algorithmic information theory,
                  prefix code, prefix Turing machine,
                  Universal Turing machine, Kolmogorov complexity,
                  plain complexity, prefix complexity.",
  abstract =     "The information content or complexity of an object can be measured
                  by the length of its shortest description. For instance the string
                  `01010101010101010101010101010101' has the short description ``16
                  repetitions of 01'', while `11001000011000011101111011101100'
                  presumably has no simpler description other than writing down the
                  string itself. More formally, the Algorithmic ``Kolmogorov''
                  Complexity (AC) of a string $x$ is defined as the length of the
                  shortest program that computes or outputs $x$, where the program is
                  run on some fixed reference universal computer.",
}

%-------------Publications-of-Marcus-Hutter-2007--------------%

@InProceedings{Hutter:07qlearn,
  author =       "Marcus Hutter and Shane Legg",
  title =        "Temporal Difference Updating without a Learning Rate",
  booktitle =    "Advances in Neural Information Processing Systems 20",
  pages =        "705--712",
  _editor =       "J.C. Platt and D. Koller and Y. Singer and S. Roweis",
  publisher =    "Curran Associates",
  address =      "Cambridge, MA, USA",
  _month =        dec,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#qlearn",
  url =          "http://arxiv.org/abs/0810.5631",
  pdf =          "http://www.hutter1.net/ai/qlearn.pdf",
  ps =           "http://www.hutter1.net/ai/qlearn.ps",
  latex =        "http://www.hutter1.net/ai/qlearn.zip",
  poster =       "http://www.hutter1.net/ai/sqlearn.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  keywords =     "reinforcement learning; temporal difference;
                  eligibility trace; variational principle; learning rate.",
  abstract =     "We derive an equation for temporal difference learning from
                  statistical principles.  Specifically, we start with the variational
                  principle and then bootstrap to produce an updating rule for
                  discounted state value estimates.  The resulting equation is similar
                  to the standard equation for temporal difference learning with
                  eligibility traces, so called TD(lambda), however it lacks the
                  parameter alpha that specifies the learning rate.  In the place
                  of this free parameter there is now an equation for the learning
                  rate that is specific to each state transition.  We experimentally
                  test this new learning rule against TD(lambda) and find that it
                  offers superior performance in various settings.  Finally, we make
                  some preliminary investigations into how to extend our new temporal
                  difference algorithm to reinforcement learning.  To do this we
                  combine our update equation with both Watkins' Q(lambda) and
                  Sarsa(lambda) and find that it again offers superior performance
                  without a learning rate parameter.",
  for =          "080101(100%)",
  znote =       "Acceptance rate: 217/975 = 22\%",
}
@InProceedings{Hutter:07intest,
  author =       "Shane Legg and Marcus Hutter",
  title =        "Tests of Machine Intelligence",
  booktitle =    "50 Years of Artificial Intelligence",
  booksubtitle = "Essays Dedicated to the 50th Anniversary of Artificial Intelligence",
  address =      "Monte Verita, Switzerland",
  series =       "LNAI",
  volume =       "4850",
  _editor =      "M. Lungarella, F. Iida, J. Bongard, R. Pfeifer",
  pages =        "232--242",
  _month =        dec,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#intest",
  url =          "http://arxiv.org/abs/0712.3825",
  pdf =          "http://www.hutter1.net/ai/intest.pdf",
  ps =           "http://www.hutter1.net/ai/intest.ps",
  latex =        "http://www.hutter1.net/ai/intest.tex",
  poster =       "http://www.hutter1.net/ai/siors.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  press =        "http://www.hutter1.net/official/press.htm#mim",
  doi =          "10.1007/978-3-540-77296-5_22",
  issn =         "0302-9743",
  isbn =         "978-3-540-77295-8",
  keywords =     "Turing test and derivatives; Compression tests; Linguistic complexity;
                  Multiple cognitive abilities; Competitive games;
                  Psychometric tests; Smith's test; C-test; Universal intelligence",
  abstract =     "Although the definition and measurement of intelligence is clearly
                  of fundamental importance to the field of artificial intelligence,
                  no general survey of definitions and tests of machine intelligence
                  exists.  Indeed few researchers are even aware of alternatives to
                  the Turing test and its many derivatives.  In this paper we fill
                  this gap by providing a short survey of the many tests of machine
                  intelligence that have been proposed.",
  support =      "SNF grant 200020-107616",
}
@Article{Hutter:07iorx,
  author =       "Shane Legg and Marcus Hutter",
  title =        "Universal Intelligence: A Definition of Machine Intelligence",
  volume =       "17",
  number =       "4",
  journal =      "Minds \& Machines",
  pages =        "391--444",
  _month =        dec,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#iorx",
  url =          "http://arxiv.org/abs/0712.3329",
  pdf =          "http://www.hutter1.net/ai/iorx.pdf",
  ps =           "http://www.hutter1.net/ai/iorx.ps",
  latex =        "http://www.hutter1.net/ai/iorx.zip",
  poster =       "http://www.hutter1.net/ai/sior.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  press =        "http://www.hutter1.net/official/press.htm#mim",
  doi =          "10.1007/s11023-007-9079-x",
  issn =         "0924-6495",
  keywords =     "AIXI, complexity theory, intelligence,
                  theoretical foundations, Turing test,
                  intelligence tests/measures/definitions",
  abstract =     "A fundamental problem in artificial intelligence is that nobody really
                  knows what intelligence is.  The problem is especially acute when we
                  need to consider artificial systems which are significantly different
                  to humans.  In this paper we approach this problem in the following
                  way: We take a number of well known informal definitions of human
                  intelligence that have been given by experts, and extract their
                  essential features.  These are then mathematically formalised to
                  produce a general measure of intelligence for arbitrary machines.  We
                  believe that this equation formally captures the concept of machine
                  intelligence in the broadest reasonable sense.  We then show how this
                  formal definition is related to the theory of universal optimal
                  learning agents.  Finally, we survey the many other tests and
                  definitions of intelligence that have been proposed for machines.",
  support =      "SNF grant 200020-107616",
}
@Article{Hutter:07pcregx,
  author =       "Marcus Hutter",
  title =        "Exact {B}ayesian Regression of Piecewise Constant Functions",
  journal =      "Bayesian Analysis",
  volume =       "2",
  number =       "4",
  pages =        "635--664",
  _month =        dec,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pcregx",
  url =          "http://arxiv.org/abs/math.ST/0606315",
  pdf =          "http://www.hutter1.net/ai/pcregx.pdf",
  ps =           "http://www.hutter1.net/ai/pcregx.ps",
  latex =        "http://www.hutter1.net/ai/pcregx.tex",
  slides =       "http://www.hutter1.net/ai/spcreg.pdf",
  award =        "http://bayesian.org/project/lindley-prize/",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  code =         "http://www.hutter1.net/ai/cpcreg.zip",
  doi =          "10.1214/07-BA225",
  issn =         "1936-0975",
  keywords =     "Bayesian regression, exact polynomial algorithm,
                  non-parametric inference, piecewise constant function,
                  dynamic programming, change point problem.",
  abstract =     "We derive an exact and efficient Bayesian regression algorithm for
                  piecewise constant functions of unknown segment number, boundary
                  locations, and levels. The derivation works for any noise and segment
                  level prior, e.g.\ Cauchy which can handle outliers. We derive
                  simple but good estimates for the in-segment variance. We also
                  propose a Bayesian regression curve as a better way of smoothing
                  data without blurring boundaries. The Bayesian approach also allows
                  straightforward determination of the evidence, break probabilities
                  and error estimates, useful for model selection and significance and
                  robustness studies. We discuss the performance on synthetic and
                  real-world examples. Many possible extensions are discussed.",
  note =         "Lindley prize for innovative research in Bayesian statistics.",
}
@Proceedings{Hutter:07altproc,
  editor =       "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto",
  title =        "Algorithmic Learning Theory",
  subtitle =     "18th International Conference ({ALT'07})",
  publisher =    "Springer",
  address =      "Sendai, Japan",
  series =       "LNAI",
  volume =       "4754",
  _month =        oct,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#altproc07",
  http =         "http://www.springer.com/computer/ai/book/978-3-540-75224-0",
  pdf =          "http://www.hutter1.net/ai/altproc07.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#other",
  doi =          "10.1007/978-3-540-75225-7",
  issn =         "0302-9743",
  isbn =         "978-3-540-75224-0",
  keywords =     "algorithmic learning theory, query models, online
                  learning, inductive inference, boosting, kernel methods, complexity
                  and learning, reinforcement learning, unsupervised learning,
                  grammatical inference, algorithmic forecasting.",
  abstract =     "The LNAI series reports state-of-the-art results in artificial
                  intelligence research, development, and education. This volume (LNAI
                  4754) contains research papers presented at the 18th International
                  Conference on Algorithmic Learning Theory (ALT 2007), which was held
                  in Sendai (Japan) during October 1-4, 2007. The main objective of
                  the conference was to provide an interdisciplinary forum for
                  high-quality talks with a strong theoretical background and
                  scientific interchange in areas such as query models, online
                  learning, inductive inference, boosting, kernel methods, complexity
                  and learning, reinforcement learning, unsupervised learning,
                  grammatical inference, and algorithmic forecasting.  The conference
                  was co-located with the 10th International Conference on Discovery
                  Science (DS 2007). The volume includes 25 technical contributions
                  that were selected from 50 submissions, and five invited talks
                  presented to the audience of ALT and DS. Longer versions of the
                  DS invited papers are available in the proceedings of DS 2007.",
  znote =        "Acceptance rate: 25/50 = 50\%",
}
@InProceedings{Hutter:07altintro,
  author =       "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto",
  title =        "Algorithmic Learning Theory 2007: Editors' Introduction",
  booktitle =    "Proc. 18th International Conf. on Algorithmic Learning Theory ({ALT'07})",
  address =      "Sendai, Japan",
  series =       "LNAI",
  volume =       "4754",
  publisher =    "Springer",
  pages =        "1--8",
  _month =        oct,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#altintro07",
  pdf =          "http://www.hutter1.net/ai/altintro07.pdf",
  ps =           "http://www.hutter1.net/ai/altintro07.ps",
  latex =        "http://www.hutter1.net/ai/altintro07.tex",
  project =      "http://www.hutter1.net/official/projects.htm#other",
  issn =         "0302-9743",
  isbn =         "3-540-75224-2",
  doi =          "10.1007/978-3-540-75225-7_1",
  keywords =     "algorithmic learning theory, query models, online
                  learning, inductive inference, boosting, kernel methods, complexity
                  and learning, reinforcement learning, unsupervised learning,
                  grammatical inference, algorithmic forecasting.",
  abstract =     "Learning theory is an active research area that incorporates ideas,
                  problems, and techniques from a wide range of disciplines including
                  statistics, artificial intelligence, information theory, pattern
                  recognition, and theoretical computer science. The research reported
                  at the 18th International Conference on Algorithmic Learning Theory
                  (ALT 2007) ranges over areas such as unsupervised learning,
                  inductive inference, complexity and learning, boosting and
                  reinforcement learning, query learning models, grammatical
                  inference, online learning and defensive forecasting, and kernel
                  methods. In this introduction we give an overview of the five
                  invited talks and the regular contributions of ALT 2007.",
}
@Article{Hutter:07uspx,
  author =       "Marcus Hutter",
  title =        "On Universal Prediction and {B}ayesian Confirmation",
  journal =      "Theoretical Computer Science",
  volume =       "384",
  number =       "1",
  pages =        "33--48",
  _month =        sep,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#uspx",
  url =          "http://arxiv.org/abs/0709.1516",
  pdf =          "http://www.hutter1.net/ai/uspx.pdf",
  ps =           "http://www.hutter1.net/ai/uspx.ps",
  latex =        "http://www.hutter1.net/ai/uspx.tex",
  slides =       "http://www.hutter1.net/ai/susp.pdf",
  poster =       "http://www.hutter1.net/ai/susps.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  doi =          "10.1016/j.tcs.2007.05.016",
  issn =         "0304-3975",
  keywords =     "Sequence prediction, Bayes, Solomonoff prior,
                  Kolmogorov complexity, Occam's razor, prediction bounds,
                  model classes, philosophical issues, symmetry principle,
                  confirmation theory, reparametrization invariance,
                  old-evidence/updating problem, (non)computable environments.",
  abstract =     "The Bayesian framework is a well-studied and successful framework
                  for inductive reasoning, which includes hypothesis testing and
                  confirmation, parameter estimation, sequence prediction,
                  classification, and regression. But standard statistical guidelines
                  for choosing the model class and prior are not always available or
                  fail, in particular in complex situations.
                  Solomonoff completed the Bayesian framework by providing a
                  rigorous, unique, formal, and universal choice for the model class
                  and the prior. We discuss in breadth how and in which sense
                  universal (non-i.i.d.) sequence prediction solves various
                  (philosophical) problems of traditional Bayesian sequence
                  prediction. We show that Solomonoff's model possesses many
                  desirable properties: Strong total and weak instantaneous bounds,
                  and in contrast to most classical continuous prior densities has
                  no zero p(oste)rior problem, i.e. can confirm universal
                  hypotheses, is reparametrization and regrouping invariant, and
                  avoids the old-evidence and updating problem. It even performs
                  well (actually better) in non-computable environments.",
}
@Article{Hutter:07mlconvxx,
  author =       "Marcus Hutter and Andrej A. Muchnik",
  title =        "On Semimeasures Predicting {Martin-L{\"o}f} Random Sequences",
  journal =      "Theoretical Computer Science",
  volume =       "382",
  number =       "3",
  pages =        "247--261",
  _month =        sep,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mlconvxx",
  url =          "http://arxiv.org/abs/0708.2319",
  pdf =          "http://www.hutter1.net/ai/mlconvxx.pdf",
  ps =           "http://www.hutter1.net/ai/mlconvxx.ps",
  latex =        "http://www.hutter1.net/ai/mlconvxx.tex",
  slides =       "http://www.hutter1.net/ai/smlconvx.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1016/j.tcs.2007.03.040",
  issn =         "0304-3975",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  universal enumerable semimeasure; mixture distributions;
                  posterior convergence; Martin-L{\"o}f randomness;
                  quasimeasures.",
  abstract =     "Solomonoff's central result on induction is that the posterior of
                  a universal semimeasure M converges rapidly and with probability
                  1 to the true sequence generating posterior mu, if the latter is
                  computable. Hence, M is eligible as a universal sequence predictor
                  in case of unknown mu. Despite some nearby results and proofs in
                  the literature, the stronger result of convergence for all
                  (Martin-Loef) random sequences remained open. Such a convergence
                  result would be particularly interesting and natural, since
                  randomness can be defined in terms of M itself. We show that there
                  are universal semimeasures M which do not converge for all random
                  sequences, i.e. we give a partial negative answer to the open
                  problem. We also provide a positive answer for some non-universal
                  semimeasures. We define the incomputable measure D as a mixture
                  over all computable measures and the enumerable semimeasure W as a
                  mixture over all enumerable nearly-measures. We show that W
                  converges to D and D to mu on all random sequences. The Hellinger
                  distance measuring closeness of two distributions plays
                  a central role.",
  support =      "SNF grant 2100-67712 and RFBF grants N04-01-00427 and N02-01-22001",
}
@Article{Hutter:07algprob,
  author =       "Marcus Hutter and Shane Legg and Paul M. B. Vit{\'a}nyi",
  title =        "Algorithmic Probability",
  journal =      "Scholarpedia",
  volume =       "2",
  number =       "8",
  pages =        "2572",
  _month =        aug,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#algprob",
  http =         "http://www.scholarpedia.org/article/Algorithmic_Probability",
  pdf =          "http://www.hutter1.net/ai/algprob.pdf",
  ps =           "http://www.hutter1.net/ai/algprob.ps",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.4249/scholarpedia.2572",
  issn =         "1941-6016",
  keywords =     "algorithmic information theory,
                  algorithmic complexity,
                  discrete/continuous algorithmic probability,
                  Bayes, Occam, Epicurus,
                  applications, references",
  abstract =     "Algorithmic ``Solomonoff'' Probability (AP) assigns to objects an a
                  priori probability that is in some sense universal. This prior
                  distribution has theoretical applications in a number of areas,
                  including inductive inference theory and the time complexity
                  analysis of algorithms. Its main drawback is that it is not
                  computable and thus can only be approximated in practice",
}
@InProceedings{Hutter:07improb,
  author =       "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter",
  title =        "Learning about a Categorical Latent Variable under Prior Near-Ignorance",
  booktitle =    "Proc. 5th International Symposium on
                  Imprecise Probability: Theories and Applications ({ISIPTA'07})",
  pages =        "357--364",
  _editor =       "G. de Cooman and J. Vejnarova and M. Zaffalon",
  publisher =    "Action M Agency",
  address =      "Prague, Czech Republic",
  _month =        jul,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#improb",
  url =          "http://arxiv.org/abs/0705.4312",
  pdf =          "http://www.hutter1.net/ai/improb.pdf",
  ps =           "http://www.hutter1.net/ai/improb.ps",
  latex =        "http://www.hutter1.net/ai/improb.tex",
  slides =       "http://www.hutter1.net/ai/simprob.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#robust",
  code =         "http://www.hutter1.net/ai/improb.cpp",
  isbn =         "978-80-86742-20-5",
  keywords =     "Prior near-ignorance, latent and manifest variables,
                  observational processes, vacuous beliefs, imprecise probabilities.",
  abstract =     "It is well known that complete prior ignorance is not compatible
                  with learning, at least in a coherent theory of (epistemic)
                  uncertainty. What is less widely known, is that there is a state
                  similar to full ignorance, that Walley calls \emph{near-ignorance},
                  that permits learning to take place. In this paper we provide new
                  and substantial evidence that also near-ignorance cannot be really
                  regarded as a way out of the problem of starting statistical
                  inference in conditions of very weak beliefs. The key to this result
                  is focusing on a setting characterized by a variable of interest
                  that is \emph{latent}. We argue that such a setting is by far the
                  most common case in practice, and we show, for the case of
                  categorical latent variables (and general \emph{manifest} variables)
                  that there is a sufficient condition that, if satisfied, prevents
                  learning to take place under prior near-ignorance. This condition is
                  shown to be easily satisfied in the most common statistical
                  problems.",
  znote =        "Acceptance rate: 48/70 = 68\%",
}
@InProceedings{Hutter:07pcreg,
  author =       "Marcus Hutter",
  title =        "{B}ayesian Regression of Piecewise Constant Functions",
  booktitle =    "Proc. ISBA 8th International Meeting on Bayesian Statistics",
  address =      "Benidorm, Spain",
  _editor =       "J.M. Bernardo and M.J. Bayarri and J.O. Berger and
                  A.P. David and D. Heckerman and A.F.M. Smith and M. West",
  publisher =    "Oxford University Press",
  pages =        "607--612",
  _month =        jul,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pcreg",
  url =          "http://arxiv.org/abs/math.ST/0606315",
  pdf =          "http://www.hutter1.net/ai/pcreg.pdf",
  ps =           "http://www.hutter1.net/ai/pcreg.ps",
  latex =        "http://www.hutter1.net/ai/pcreg.tex",
  slides =       "http://www.hutter1.net/ai/spcreg.pdf",
  award =        "http://bayesian.org/project/lindley-prize/",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  ccode =        "http://www.hutter1.net/ai/pcreg.cpp",
  rcode =        "http://www.hutter1.net/ai/cpcreg.zip",
  isbn =         "978-0-19-921465-5",
  abstract =     "We derive an exact and efficient Bayesian regression algorithm for
                  piecewise constant functions of unknown segment number, boundary
                  location, and levels. It works for any noise and segment level
                  prior, e.g.\ Cauchy which can handle outliers. We derive simple but
                  good estimates for the in-segment variance. We also propose a
                  Bayesian regression curve as a better way of smoothing data without
                  blurring boundaries. The Bayesian approach also allows
                  straightforward determination of the evidence, break probabilities
                  and error estimates, useful for model selection and significance and
                  robustness studies. We briefly mention the performance on synthetic
                  and real-world examples. The full version of the paper contains
                  detailed derivations, more motivation and discussion, the complete
                  algorithm, the experiments, and various extensions.",
  keywords =     "Bayesian regression, exact polynomial algorithm, non-parametric
                  inference, piecewise constant function, dynamic programming,
                  change point problem.",
  note =         "Lindley prize for innovative research in Bayesian statistics.",
  znote =        "Acceptance rate: 19/326 = 6\%.",
}
@InProceedings{Hutter:07pquest,
  author =       "Daniil Ryabko and Marcus Hutter",
  title =        "On Sequence Prediction for Arbitrary Measures",
  booktitle =    "Proc. IEEE International Symposium on Information Theory ({ISIT'07})",
  pages =        "2346--2350",
  _editor =       "A. Goldsmith and M. Medard and A. Shokrollahi and R. Zamir",
  publisher =    "IEEE",
  address =      "Nice, France",
  _month =        jun,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#pquest",
  url =          "http://arxiv.org/abs/cs.LG/0606077",
  pdf =          "http://www.hutter1.net/ai/pquest.pdf",
  ps =           "http://www.hutter1.net/ai/pquest.ps",
  latex =        "http://www.hutter1.net/ai/pquest.tex",
  slides =       "http://www.hutter1.net/ai/spquest.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  doi =          "10.1109/ISIT.2007.4557570",
  isbn =         "1-4244-1429-6",
  keywords =     "sequence prediction, local absolute continuity,
                  non-stationary measures, average/expected criteria,
                  absolute/KL divergence, mixtures of measures.",
  abstract =     "Suppose we are given two probability measures on the set of
                  one-way infinite finite-alphabet sequences. Consider the
                  question when one of the  measures predicts the other, that is,
                  when conditional probabilities  converge (in a certain sense), if
                  one of the measures is chosen to generate the sequence. This
                  question may be considered a refinement of the problem of sequence
                  prediction in its most general formulation: for a given  class of
                  probability measures, does there exist a measure which predicts
                  all of the measures in the class? To address this problem, we find
                  some conditions on local absolute continuity which are sufficient
                  for prediction and  generalize several different notions
                  that are known to be sufficient for prediction. We also formulate
                  some open questions to outline a direction for finding the
                  conditions on classes of measures for which prediction is
                  possible.",
  support =      "SNF grant 200020-107616",
}
@InProceedings{Hutter:07idefs,
  author =       "Shane Legg and Marcus Hutter",
  title =        "A Collection of Definitions of Intelligence",
  booktitle =    "Advances in Artificial General Intelligence: Concepts, Architectures and Algorithms",
  series =       "Frontiers in Artificial Intelligence and Applications",
  volume =       "157",
  pages =        "17--24",
  editor =       "B. Goertzel and P. Wang",
  publisher =    "IOS Press",
  address   =    "Amsterdam, NL",
  _month =        jun,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#idefs",
  url =          "http://arxiv.org/abs/0706.3639",
  http =         "http://www.idsia.ch/~shane/intelligence.html",
  pdf =          "http://www.hutter1.net/ai/idefs.pdf",
  ps =           "http://www.hutter1.net/ai/idefs.ps",
  latex =        "http://www.hutter1.net/ai/idefs.tex",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  isbn =         "978-1-58603-758-1",
  issn =         "0922-6389",
  keywords =     "intelligence definitions, collective, psychologist,
                  artificial, universal",
  abstract =     "This chapter is a survey of a large number of informal definitions
                  of ``intelligence'' that the authors have collected over the years.
                  Naturally, compiling a complete list would be impossible as many
                  definitions of intelligence are buried deep inside articles and
                  books. Nevertheless, the 70-odd definitions presented here are, to
                  the authors' knowledge, the largest and most well referenced
                  collection there is.",
  support =      "SNF grant 200020-107616",
}
@InProceedings{Hutter:07lorp,
  author =       "Marcus Hutter",
  title =        "The Loss Rank Principle for Model Selection",
  booktitle =    "Proc. 20th Annual Conf. on Learning Theory ({COLT'07})",
  address =      "San Diego, USA",
  series =       "LNAI",
  volume =       "4539",
  _editor =       "N. Bshouty and C. Gentile",
  publisher =    "Springer",
  pages =        "589--603",
  _month =        jun,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#lorp",
  url =          "http://arxiv.org/abs/math.ST/0702804",
  pdf =          "http://www.hutter1.net/ai/lorp.pdf",
  ps =           "http://www.hutter1.net/ai/lorp.ps",
  latex =        "http://www.hutter1.net/ai/lorp.tex",
  slides =       "http://www.hutter1.net/ai/slorp.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  doi =          "10.1007/978-3-540-72927-3_42",
  issn =         "0302-9743",
  keywords =     "Model selection, loss rank principle,
                  non-parametric regression, classification
                  general loss function, k nearest neighbors.",
  abstract =     "We introduce a new principle for model selection in regression and
                  classification. Many regression models are controlled by some
                  smoothness or flexibility or complexity parameter c, e.g. the number
                  of neighbors to be averaged over in k nearest neighbor (kNN)
                  regression or the polynomial degree in regression with polynomials.
                  Let f_D^c be the (best) regressor of complexity c on data D. A more
                  flexible regressor can fit more data D' well than a more rigid one.
                  If something (here small loss) is easy to achieve it's typically
                  worth less. We define the loss rank of f_D^c as the number of other
                  (fictitious) data D' that are fitted better by f_D'^c than D is
                  fitted by f_D^c. We suggest selecting the model complexity c that
                  has minimal loss rank (LoRP). Unlike most penalized maximum
                  likelihood variants (AIC,BIC,MDL), LoRP only depends on the
                  regression function and loss function. It works without a stochastic noise
                  model, and is directly applicable to any non-parametric regressor,
                  like kNN. In this paper we formalize, discuss, and motivate LoRP,
                  study it for specific regression problems, in particular linear
                  ones, and compare it to other model selection schemes.",
  znote =       "Acceptance rate: 41/92 = 45\%",
}
@Article{Hutter:07ait,
  author =       "Marcus Hutter",
  title =        "Algorithmic Information Theory: a brief non-technical guide to the field",
  journal =      "Scholarpedia",
  volume =       "2",
  number =       "3",
  pages =        "2519",
  _month =        mar,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ait",
  http =         "http://www.scholarpedia.org/article/Algorithmic_Information_Theory",
  url =          "http://arxiv.org/abs/cs.IT/0703024",
  pdf =          "http://www.hutter1.net/ai/ait.pdf",
  ps =           "http://www.hutter1.net/ai/ait.ps",
  latex =        "http://www.hutter1.net/ai/ait.zip",
  slides =       "http://www.hutter1.net/ai/sapplait.pdf",
  video1 =       "https://pirsa.org/18040109",
  video2 =       "http://www.youtu.be/WZvxAA8ZdD4",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.4249/scholarpedia.2519",
  issn =         "1941-6016",
  keywords =     "Algorithmic information theory,
                  algorithmic ``Kolmogorov'' complexity,
                  algorithmic ``Solomonoff'' probability,
                  universal ``Levin'' search,
                  algorithmic ``Martin-Loef'' randomness,
                  applications, history, references, notation, nomenclature, map.",
  abstract =     "This article is a brief guide to the field of algorithmic
                  information theory (AIT), its underlying philosophy, and the most
                  important concepts. AIT arises by mixing information theory and
                  computation theory to obtain an objective and absolute notion of
                  information in an individual object, and in so doing gives rise to
                  an objective and robust notion of randomness of individual objects.
                  This is in contrast to classical information theory that is based on
                  random variables and communication, and has no bearing on
                  information and randomness of individual objects. After a brief
                  overview, the major subfields, applications, history, and a map of
                  the field are presented.",
}
@Article{Hutter:07postbndx,
  author =       "Alexey Chernov and Marcus Hutter and J{\"u}rgen Schmidhuber",
  title =        "Algorithmic Complexity Bounds on Future Prediction Errors",
  journal =      "Information and Computation",
  volume =       "205",
  number =       "2",
  pages =        "242--261",
  _month =        feb,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#postbndx",
  url =          "http://arxiv.org/abs/cs.LG/0701120",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT05/alt05.jhtml",
  pdf =          "http://www.hutter1.net/ai/postbndx.pdf",
  ps =           "http://www.hutter1.net/ai/postbndx.ps",
  latex =        "http://www.hutter1.net/ai/postbndx.tex",
  slides =       "http://www.hutter1.net/ai/spostbnd.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1016/j.ic.2006.10.004",
  issn =         "0890-5401",
  keywords =     "Kolmogorov complexity, posterior bounds, online sequential prediction,
                  Solomonoff prior, monotone conditional complexity, total error,
                  future loss, randomness deficiency",
  abstract =     "We bound the future loss when predicting any (computably) stochastic
                  sequence online. Solomonoff finitely bounded the total deviation
                  of his universal predictor $M$ from the true distribution $mu$ by
                  the algorithmic complexity of $mu$. Here we assume we are at a
                  time $t>1$ and already observed $x=x_1...x_t$. We bound the future
                  prediction performance on $x_{t+1}x_{t+2}...$ by a new variant of
                  algorithmic complexity of $mu$ given $x$, plus the complexity of
                  the randomness deficiency of $x$. The new complexity is monotone
                  in its condition in the sense that this complexity can only
                  decrease if the condition is prolonged. We also briefly discuss
                  potential generalizations to Bayesian model classes and to
                  classification problems.",
  support =      "SNF grant 2000-61847",
}
@InCollection{Hutter:07aixigentle,
  author =       "Marcus Hutter",
  title =        "Universal Algorithmic Intelligence: A Mathematical Top$\rightarrow$Down Approach",
  booktitle =    "Artificial General Intelligence",
  _editor =       "B. Goertzel and C. Pennachin",
  publisher =    "Springer",
  address =       "Berlin",
  _series =       "Cognitive Technologies",
  pages =        "227--290",
  _month =        jan,
  year =         "2007",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixigentle",
  http =         "http://www.hutter1.net/ai/aixigentle.htm",
  url =          "http://arxiv.org/abs/cs.AI/0701125",
  pdf =          "http://www.hutter1.net/ai/aixigentle.pdf",
  ps =           "http://www.hutter1.net/ai/aixigentle.ps",
  latex =        "http://www.hutter1.net/ai/aixigentle.tex",
  slides =       "http://www.hutter1.net/ai/saixigentle.pdf",
  video =        "http://vimeo.com/14888930",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  press =        "http://www.hutter1.net/official/press.htm#uaibook",
  doi =          "10.1007/978-3-540-68677-4_8",
  isbn =         "3-540-23733-X",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Artificial intelligence; algorithmic probability;
                  sequential decision theory; rational agents;
                  value function; Solomonoff induction;
                  Kolmogorov complexity; reinforcement learning;
                  universal sequence prediction; strategic games;
                  function minimization; supervised learning.",
  abstract =     "Decision theory formally solves the problem of rational agents in
                  uncertain worlds if the true environmental prior probability
                  distribution is known. Solomonoff's theory of universal induction
                  formally solves the problem of sequence prediction for unknown
                  prior distribution. We combine both ideas and get a parameter-free
                  theory of universal Artificial Intelligence. We give strong
                  arguments that the resulting AIXI model is the most intelligent
                  unbiased agent possible. We outline for a number of problem
                  classes, including sequence prediction, strategic games, function
                  minimization, reinforcement and supervised learning, how the AIXI
                  model can formally solve them. The major drawback of the AIXI
                  model is that it is uncomputable. To overcome this problem, we
                  construct a modified algorithm AIXI$tl$ that is still
                  effectively more intelligent than any other time $t$ and length $l$
                  bounded agent. The computation time of AIXI$tl$ is of the order $t
                  \cdot 2^l$. Other discussed topics are formal definitions of
                  intelligence order relations, the horizon problem and relations of
                  the AIXI theory to other AI approaches.",
}

%-------------Publications-of-Marcus-Hutter-2006--------------%

@Article{Hutter:06unipriorx,
  author =       "Marcus Hutter",
  title =        "On Generalized Computable Universal Priors and their Convergence",
  journal =      "Theoretical Computer Science",
  volume =       "364",
  number =       "1",
  pages =        "27--41",
  _month =        nov,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#unipriorx",
  url =          "http://arxiv.org/abs/cs.LG/0503026",
  pdf =          "http://www.hutter1.net/ai/unipriorx.pdf",
  ps =           "http://www.hutter1.net/ai/unipriorx.ps",
  latex =        "http://www.hutter1.net/ai/unipriorx.tex",
  slides =       "http://www.hutter1.net/ai/sunipriors.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1016/j.tcs.2006.07.039",
  issn =         "0304-3975",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  Solomonoff's prior; universal probability;
                  mixture distributions; posterior convergence;
                  computability concepts; Martin-Loef randomness.",
  abstract =     "Solomonoff unified Occam's razor and Epicurus' principle of
                  multiple explanations to one elegant, formal, universal theory of
                  inductive inference, which initiated the field of algorithmic
                  information theory. His central result is that the posterior of
                  the universal semimeasure M converges rapidly to the true sequence
                  generating posterior mu, if the latter is computable. Hence, M is
                  eligible as a universal predictor in case of unknown mu. The first
                  part of the paper investigates the existence and convergence of
                  computable universal (semi)measures for a hierarchy of
                  computability classes: recursive, estimable, enumerable, and
                  approximable. For instance, M is known to be enumerable, but
                  not estimable, and to dominate all enumerable semimeasures. We
                  present proofs for discrete and continuous semimeasures. The
                  second part investigates more closely the types of convergence,
                  possibly implied by universality: in difference and in ratio, with
                  probability 1, in mean sum, and for Martin-Loef random sequences.
                  We introduce a generalized concept of randomness for individual
                  sequences and use it to exhibit difficulties regarding these
                  issues. In particular, we show that convergence fails (holds) on
                  generalized-random sequences in gappy (dense) Bernoulli classes.",
}
@Article{Hutter:06fuo,
  author =       "Marcus Hutter and Shane Legg",
  title =        "Fitness Uniform Optimization",
  journal  =     "IEEE Transactions on Evolutionary Computation",
  volume =       "10",
  number =       "5",
  pages =        "568--589",
  _month =        oct,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#fuo",
  url =          "http://arxiv.org/abs/cs.NE/0610126",
  pdf =          "http://www.hutter1.net/ai/fuo.pdf",
  ps =           "http://www.hutter1.net/ai/fuo.ps",
  latex =        "http://www.hutter1.net/ai/fuo.zip",
  slides =       "http://www.hutter1.net/ai/sfuss.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#optimize",
  press =        "http://www.hutter1.net/official/press.htm#fuss",
  doi =          "10.1109/TEVC.2005.863127",
  issn =         "1089-778X",
  keywords =     "Evolutionary algorithms, fitness uniform selection scheme, fitness
                  uniform deletion scheme, preserve diversity, local optima, evolution,
                  universal similarity relation, correlated recombination, fitness tree
                  model, traveling salesman, set covering, satisfiability.",
  abstract =     "In evolutionary algorithms, the fitness of a population increases with
                  time by mutating and recombining individuals and by a biased selection
                  of more fit individuals. The right selection pressure is critical in
                  ensuring sufficient optimization progress on the one hand and in
                  preserving genetic diversity to be able to escape from local optima on
                  the other hand. Motivated by a universal similarity relation on the
                  individuals, we propose a new selection scheme, which is uniform in
                  the fitness values. It generates selection pressure toward sparsely
                  populated fitness regions, not necessarily toward higher fitness, as
                  is the case for all other selection schemes. We show analytically on a
                  simple example that the new selection scheme can be much more
                  effective than standard selection schemes.  We also propose a new
                  deletion scheme which achieves a similar result via deletion and show
                  how such a scheme preserves genetic diversity more effectively than
                  standard approaches.  We compare the performance of the new schemes to
                  tournament selection and random deletion on an artificial deceptive
                  problem and a range of NP-hard problems: traveling salesman, set
                  covering and satisfiability.",
}
@InProceedings{Hutter:06discount,
  author =       "Marcus Hutter",
  title =        "General Discounting versus Average Reward",
  booktitle =    "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})",
  address =      "Barcelona, Spain",
  series =       "LNAI",
  volume =       "4264",
  _editor =       "Jose L. Balcázar and Phil Long and Frank Stephan",
  publisher =    "Springer",
  pages =        "244--258",
  _month =        oct,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#discount",
  url =          "http://arxiv.org/abs/cs.LG/0605040",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml",
  pdf =          "http://www.hutter1.net/ai/discount.pdf",
  ps =           "http://www.hutter1.net/ai/discount.ps",
  latex =        "http://www.hutter1.net/ai/discount.tex",
  slides =       "http://www.hutter1.net/ai/sdiscount.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#rl",
  issn =         "0302-9743",
  isbn =         "3-540-46649-5",
  doi =          "10.1007/11894841_21",
  keywords =     "reinforcement learning; average value;
                  discounted value; arbitrary environment;
                  arbitrary discount sequence; effective horizon;
                  increasing farsightedness; consistent behavior.",
  abstract =     "Consider an agent interacting with an environment in cycles. In
                  every interaction cycle the agent is rewarded for its performance.
                  We compare the average reward U from cycle 1 to m (average
                  value) with the future discounted reward V from cycle k to
                  infinity (discounted value). We consider essentially arbitrary
                  (non-geometric) discount sequences and arbitrary reward sequences
                  (non-MDP environments). We show that asymptotically U for
                  m->infinity and V for k->infinity are equal, provided both
                  limits exist. Further, if the effective horizon grows linearly
                  with k or faster, then existence of the limit of U implies
                  that the limit of V exists. Conversely, if the effective horizon
                  grows linearly with k or slower, then existence of the limit of
                  V implies that the limit of U exists.",
  znote =        "Acceptance rate: 24/53 = 45\%",
}
@InProceedings{Hutter:06actopt,
  author =       "Daniil Ryabko and Marcus Hutter",
  title =        "Asymptotic Learnability of Reinforcement Problems with Arbitrary Dependence",
  booktitle =    "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})",
  address =      "Barcelona, Spain",
  series =       "LNAI",
  volume =       "4264",
  _editor =       "Jose L. Balcázar and Phil Long and Frank Stephan",
  publisher =    "Springer",
  pages =        "334--347",
  _month =        oct,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#actopt",
  url =          "http://arxiv.org/abs/cs.LG/0603110",
  conf =         "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml",
  pdf =          "http://www.hutter1.net/ai/actopt.pdf",
  ps =           "http://www.hutter1.net/ai/actopt.ps",
  latex =        "http://www.hutter1.net/ai/actopt.tex",
  slides =       "http://www.hutter1.net/ai/sactopt.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#universal",
  press =        "http://www.hutter1.net/official/press.htm#universal",
  issn =         "0302-9743",
  isbn =         "3-540-46649-5",
  doi =          "10.1007/11894841_27",
  keywords =     "Reinforcement learning, asymptotic average value,
                  self-optimizing policies, (non) Markov decision processes.",
  abstract =     "We address the problem of reinforcement
                  learning in which observations may exhibit an arbitrary form of
                  stochastic dependence on past observations and actions,
                  i.e. environments more general than (PO)MDPs.
                  The task for an agent is to attain the  best possible asymptotic
                  reward where the true generating environment is unknown but
                  belongs to a known countable family of environments. We find some
                  sufficient conditions on the class of  environments under which an
                  agent exists which attains the best asymptotic reward for any
                  environment in the class. We analyze how tight these conditions
                  are and how they relate to different probabilistic assumptions
                  known in reinforcement learning and related fields, such as Markov
                  Decision Processes and mixing conditions.",
  znote =        "Acceptance rate: 24/53 = 45\%",
}
@Misc{Hutter:06hprize,
  author =       "Marcus Hutter",
  title =        "Human Knowledge Compression Prize",
  _month =        aug,
  year =         "2006/2020",
  bibtex =       "http://www.hutter1.net/official/bib.htm#hprize",
  project =      "http://www.hutter1.net/prize/index.htm",
  press =        "http://www.hutter1.net/official/press.htm#hprize",
  keywords =     "Wikipedia; artificial intelligence;
                  lossless data compression; 50'000€/500'000€.",
  abstract =     "Being able to compress well is closely related to intelligence.
                  While intelligence is a slippery concept, file sizes are hard
                  numbers. The intention of this prize is to give incentives for
                  advancing the field of Artificial Intelligence through the
                  compression of human knowledge. The better one can compress the
                  encyclopedia Wikipedia, the better one can predict; and being able
                  to predict well is key for being able to act intelligently.",
  note =         "open ended, http://prize.hutter1.net/",
  for =          "080401(80%),080199(20%)",
}
@Article{Hutter:06mdlspeedx,
  author =       "Jan Poland and Marcus Hutter",
  title =        "{MDL} Convergence Speed for {B}ernoulli Sequences",
  journal =      "Statistics and Computing",
  volume =       "16",
  number =       "2",
  pages =        "161--175",
  _month =        jun,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mdlspeedx",
  url =          "http://arxiv.org/abs/math.ST/0602505",
  pdf =          "http://www.hutter1.net/ai/mdlspeedx.pdf",
  ps =           "http://www.hutter1.net/ai/mdlspeedx.ps",
  latex =        "http://www.hutter1.net/ai/mdlspeedx.tex",
  slides =       "http://www.hutter1.net/ai/smdlspeed.pdf",
  slidesppt =    "http://www.hutter1.net/ai/smdlspeed.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  issn =         "0960-3174",
  doi =          "10.1007/s11222-006-6746-3",
  keywords =     "MDL, Minimum Description Length, Convergence Rate,
                  Prediction, Bernoulli, Discrete Model Class.",
  abstract =     "The Minimum Description Length principle for online sequence
                  estimation/prediction in a proper learning setup is studied. If
                  the underlying model class is discrete, then the total expected
                  square loss is a particularly interesting performance measure: (a)
                  this quantity is finitely bounded, implying convergence with
                  probability one, and (b) it additionally specifies the convergence
                  speed. For MDL, in general one can only have loss bounds which are
                  finite but exponentially larger than those for Bayes mixtures. We
                  show that this is even the case if the model class contains only
                  Bernoulli distributions. We derive a new upper bound on the
                  prediction error for countable Bernoulli classes. This implies a
                  small bound (comparable to the one for Bayes mixtures) for certain
                  important model classes. We discuss the application to Machine
                  Learning tasks such as classification and hypothesis testing, and
                  generalization to countable classes of i.i.d. models.",
}
@InProceedings{Hutter:06usp,
  author =       "Marcus Hutter",
  title =        "On the Foundations of Universal Sequence Prediction",
  booktitle =    "Proc. 3rd Annual Conference on Theory and
                  Applications of Models of Computation ({TAMC'06})",
  volume =       "3959",
  series =       "LNCS",
  pages =        "408--420",
  _editor =       "J.-Y. Cai and S. B. Cooper and A. Li",
  publisher =    "Springer",
  _address =      "Beijing",
  _month =        may,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#usp",
  url =          "http://arxiv.org/abs/cs.LG/0605009",
  conf =         "http://gcl.iscas.ac.cn/accl06/TAMC06_Home.htm",
  pdf =          "http://www.hutter1.net/ai/usp.pdf",
  ps =           "http://www.hutter1.net/ai/usp.ps",
  latex =        "http://www.hutter1.net/ai/usp.tex",
  slides =       "http://www.hutter1.net/ai/susp.pdf",
  poster =       "http://www.hutter1.net/ai/susps.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "0302-9743",
  isbn =         "3-540-34021-1",
  doi =          "10.1007/11750321_39",
  keywords =     "Sequence prediction, Bayes, Solomonoff prior,
                  Kolmogorov complexity, Occam's razor, prediction bounds,
                  model classes, philosophical issues, symmetry principle,
                  confirmation theory, reparametrization invariance,
                  old-evidence/updating problem, (non)computable environments.",
  abstract =     "Solomonoff completed the Bayesian framework by providing a
                  rigorous, unique, formal, and universal choice for the model class
                  and the prior. We discuss in breadth how and in which sense
                  universal (non-i.i.d.) sequence prediction solves various
                  (philosophical) problems of traditional Bayesian sequence
                  prediction. We show that Solomonoff's model possesses many
                  desirable properties: Fast convergence and strong bounds, and in
                  contrast to most classical continuous prior densities has no zero
                  p(oste)rior problem, i.e. can confirm universal hypotheses, is
                  reparametrization and regrouping invariant, and avoids the
                  old-evidence and updating problem. It even performs well (actually
                  better) in non-computable environments.",
  znote =        "Acceptance rate: 76/400 = 19\%",
  alt =          "Also 2-page abstract and poster at 9th ISBA World Meeting (2008)",
  abstract2p =   "http://www.hutter1.net/ai/usps.pdf",
}
@InProceedings{Hutter:06aixifoe,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Universal Learning of Repeated Matrix Games",
  booktitle =    "Proc. 15th Annual Machine Learning Conf. of {B}elgium and {T}he {N}etherlands ({Benelearn'06})",
  pages =        "7--14",
  address =      "Ghent, Belgium",
  _editor =       "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer",
  xpublisher =    "",
  _month =        may,
  year =         "2006",
  isbn =         "90 382 0948 7",
  bibtex =       "http://www.hutter1.net/official/bib.htm#aixifoe",
  url =          "http://arxiv.org/abs/cs.LG/0508073",
  conf =         "http://bioinformatics.psb.ugent.be/benelearn2006/",
  pdf =          "http://www.hutter1.net/ai/aixifoe.pdf",
  ps =           "http://www.hutter1.net/ai/aixifoe.ps",
  latex =        "http://www.hutter1.net/ai/aixifoe.zip",
  slides =       "http://www.hutter1.net/ai/saixifoe.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#expert",
  abstract =     "We study and compare the learning dynamics of two universal
                  learning algorithms, one based on Bayesian learning and the
                  other on prediction with expert advice. Both approaches have
                  strong asymptotic performance guarantees. When confronted with
                  the task of finding good long-term strategies in repeated
                  2 x 2 matrix games, they behave quite differently. We consider
                  the case where the learning algorithms are not even informed
                  about the game they are playing.",
}
@InProceedings{Hutter:06ior,
  author =       "Shane Legg and Marcus Hutter",
  title =        "A Formal Measure of Machine Intelligence",
  booktitle =    "Proc. 15th Annual Machine Learning Conference of {B}elgium and {T}he {N}etherlands ({Benelearn'06})",
  pages =        "73--80",
  address =      "Ghent, Belgium",
  _editor =       "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer",
  _month =        may,
  year =         "2006",
  isbn =         "90 382 0948 7",
  bibtex =       "http://www.hutter1.net/official/bib.htm#ior",
  url =          "http://arxiv.org/abs/cs.AI/0605024",
  conf =         "http://bioinformatics.psb.ugent.be/benelearn2006/",
  pdf =          "http://www.hutter1.net/ai/ior.pdf",
  ps =           "http://www.hutter1.net/ai/ior.ps",
  latex =        "http://www.hutter1.net/ai/ior.zip",
  slides =       "http://www.hutter1.net/ai/sior.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  press =        "http://www.hutter1.net/official/press.htm#ior",
  abstract =     "A fundamental problem in artificial intelligence is that nobody really
                  knows what intelligence is.  The problem is especially acute when we
                  need to consider artificial systems which are significantly different
                  to humans.  In this paper we approach this problem in the following
                  way: We take a number of well known informal definitions of human
                  intelligence that have been given by experts, and extract their
                  essential features.  These are then mathematically formalised to
                  produce a general measure of intelligence for arbitrary machines.  We
                  believe that this measure formally captures the concept of machine
                  intelligence in the broadest reasonable sense.",
}
@InProceedings{Hutter:06robot,
  author =       "Viktor Zhumatiy and Faustino Gomez and Marcus Hutter and J{\"u}rgen Schmidhuber",
  title =        "Metric State Space Reinforcement Learning for a Vision-Capable Mobile Robot",
  booktitle =    "Proc. 9th International Conf. on Intelligent Autonomous Systems ({IAS'06})",
  pages =        "272--281",
  _editor =       "Tamio Arai and Rolf Pfeifer and Tucker Balch and Hiroshi Yokoi",
  publisher =    "IOR Press",
  _month =        mar,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#robot",
  url =          "http://arxiv.org/abs/cs.RO/0603023",
  conf =         "http://www.arai.pe.u-tokyo.ac.jp/IAS-9/",
  pdf =          "http://www.hutter1.net/ai/robot.pdf",
  ps =           "http://www.hutter1.net/ai/robot.ps",
  latex =        "http://www.hutter1.net/ai/robot.zip",
  slides =       "http://www.hutter1.net/ai/srobot.pdf",
  slidesppt =    "http://www.hutter1.net/ai/srobot.ppt",
  isbn =         "1-58603-595-9",
  keywords =     "reinforcement learning; mobile robots.",
  abstract =     "We address the problem of autonomously learning controllers for
                  vision-capable mobile robots. We extend McCallum's (1995)
                  Nearest-Sequence Memory algorithm to allow for general metrics
                  over state-action trajectories. We demonstrate the feasibility of
                  our approach by successfully running our algorithm on a real
                  mobile robot. The algorithm is novel and unique in that it (a)
                  explores the environment and learns directly on a mobile robot
                  without using a hand-made computer model as an intermediate step,
                  (b) does not require manual discretization of the sensor input
                  space, (c) works in piecewise continuous perceptual spaces, and
                  (d) copes with partial observability. Together this allows
                  learning from much less experience compared to previous methods.",
  znote =        "Acceptance rate: 112/146 = 77\%",
}
@Article{Hutter:06knapsack,
  author =       "Monaldo Mastrolilli and Marcus Hutter",
  title =        "Hybrid Rounding Techniques for Knapsack Problems",
  journal =      "Discrete Applied Mathematics",
  volume =       "154",
  number =       "4",
  pages =        "640--649",
  _month =        mar,
  year =         "2006",
  bibtex =       "http://www.hutter1.net/official/bib.htm#knapsack",
  url =          "http://arxiv.org/abs/cs.CC/0305002",
  pdf =          "http://www.hutter1.net/ai/knapsack.pdf",
  ps =           "http://www.hutter1.net/ai/knapsack.ps",
  latex =        "http://www.hutter1.net/ai/knapsack.tex",
  project =      "http://www.hutter1.net/official/projects.htm#optimize",
  issn =         "0166-218X",
  doi =          "10.1016/j.dam.2005.08.004",
  abstract =     "We address the classical knapsack problem and a variant in which an upper
                  bound is imposed on the number of items that can be selected. We show that
                  appropriate combinations of rounding techniques yield novel and powerful
                  ways of rounding. As an application of these techniques, we present faster
                  polynomial time approximation schemes that computes an approximate solution
                  of any fixed accuracy in linear time. This linear complexity bounds give a
                  substantial improvement of the best previously known polynomial bounds",
}
@Article{Hutter:06unimdlx,
  author =       "Marcus Hutter",
  title =        "Sequential Predictions based on Algorithmic Complexity",
  journal =      "Journal of Computer and System Sciences",
  volume =       "72",
  number =       "1",
  pages =        "95--117",
  _month =        feb,
  year =         "2006",
  url =          "http://arxiv.org/abs/cs.IT/0508043",
  bibtex =       "http://www.hutter1.net/official/bib.htm#unimdlx",
  url =          "http://arxiv.org/abs/cs.IT/0508043",
  pdf =          "http://www.hutter1.net/ai/unimdlx.pdf",
  ps =           "http://www.hutter1.net/ai/unimdlx.ps",
  latex =        "http://www.hutter1.net/ai/unimdlx.tex",
  slides =       "http://www.hutter1.net/ai/sunimdl.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  issn =         "0022-0000",
  doi =          "10.1016/j.jcss.2005.07.001",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  Solomonoff's prior; Monotone Kolmogorov Complexity;
                  Minimal Description Length; Convergence;
                  Self-Optimizingness",
  abstract =     "This paper studies sequence prediction based on the
                  monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on
                  universal MDL. $m$ is extremely close to Solomonoff's prior $M$,
                  the latter being an excellent predictor in deterministic as well
                  as probabilistic environments, where performance is measured in
                  terms of convergence of posteriors or losses. Despite this
                  closeness to $M$, it is difficult to assess the prediction quality
                  of $m$, since little is known about the closeness of their
                  posteriors, which are the important quantities for prediction.
                  We show that for deterministic computable environments, the
                  ``posterior'' and losses of $m$ converge, but rapid convergence
                  could only be shown on-sequence; the off-sequence behavior is
                  unclear. In probabilistic environments, neither the posterior nor
                  the losses converge, in general.",
}
@Proceedings{Hutter:06kcdagabs,
  editor =       "Marcus Hutter and Wolfgang Merkle and Paul M. B. Vit\'anyi",
  title =        "Kolmogorov Complexity and Applications",
  number =       "06051",
  _month =        jan/aug,
  year =         "2006",
  series =       "Dagstuhl Seminar Proceedings",
  url1 =         "http://www.hutter1.net/dagstuhl/",
  url2 =         "http://drops.dagstuhl.de/portals/06051",
  url3 =         "http://drops.dagstuhl.de/opus/volltexte/2006/663",
  pdf =          "http://www.hutter1.net/dagstuhl/kcdagabs.pdf",
  ps =           "http://www.hutter1.net/dagstuhl/kcdagabs.ps",
  latex =        "http://www.hutter1.net/dagstuhl/kcdagabs.tex",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  issn =         "1862-4405",
  publisher =    "IBFI",
  _publisher =    "Internationales Begegnungs- und Forschungszentrum fuer Informatik (IBFI), Schloss Dagstuhl, Germany",
  address =      "Dagstuhl, Germany",
  keywords =     "Information theory, Kolmogorov Complexity, effective randomnes,
                  algorithmic probability, recursion theory, computational complexity,
                  machine learning",
  abstract =     "From 29.01.06 to 03.02.06,
                  the Dagstuhl Seminar 06051 ``Kolmogorov Complexity and Applications''
                  was held in the International Conference and Research Center (IBFI),
                  Schloss Dagstuhl. During the seminar, several participants presented
                  their current research, and ongoing work and open problems were
                  discussed. Abstracts of the presentations given during the seminar
                  as well as abstracts of seminar results and ideas are put together
                  in this proceedings. The first section describes the seminar topics and
                  goals in general. Links to extended abstracts or full papers are
                  provided, if available.",
  note =         "http://drops.dagstuhl.de/portals/06051",
}

%-------------Publications-of-Marcus-Hutter-2005--------------%

@Article{Hutter:05mdl2px,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Asymptotics of Discrete {MDL} for Online Prediction",
  journal =      "IEEE Transactions on Information Theory",
  _month =        nov,
  volume =       "51",
  number =       "11",
  pages =        "3780--3795",
  year =         "2005",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mdl2px",
  url =          "http://arxiv.org/abs/cs.IT/0506022",
  pdf =          "http://www.hutter1.net/ai/mdl2px.pdf",
  ps =           "http://www.hutter1.net/ai/mdl2px.ps",
  latex =        "http://www.hutter1.net/ai/mdl2px.zip",
  slides =       "http://www.hutter1.net/ai/smdl2p.pdf",
  slidesppt =    "http://www.hutter1.net/ai/smdl2p.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  doi =          "10.1109/TIT.2005.856956",
  issn =         "0018-9448",
  keywords =     "Algorithmic Information Theory, Classification, Consistency,
                  Discrete Model Class, Loss Bounds, Minimum Description Length,
                  Regression, Sequence Prediction, Stabilization, Universal Induction.",
  abstract =     "Minimum Description Length (MDL) is an important principle for induction and
                  prediction, with strong relations to optimal Bayesian learning. This paper
                  deals with learning non-i.i.d. processes by means of two-part MDL, where the
                  underlying model class is countable. We consider the online learning framework,
                  i.e. observations come in one by one, and the predictor is allowed to update
                  his state of mind after each time step. We identify two ways of predicting by
                  MDL for this setup, namely a static and a dynamic one. (A third variant,
                  hybrid MDL, will turn out inferior.) We will prove that under the only
                  assumption that the data is generated by a distribution contained in the model
                  class, the MDL predictions converge to the true values almost surely. This is
                  accomplished by proving finite bounds on the quadratic, the Hellinger, and the
                  Kullback-Leibler loss of the MDL learner, which are however exponentially worse
                  than for Bayesian prediction. We demonstrate that these bounds are sharp, even
                  for model classes containing only Bernoulli distributions. We show how these
                  bounds imply regret bounds for arbitrary loss functions. Our results apply to a
                  wide range of setups, namely sequence prediction, pattern classification,
                  regression, and universal induction in the sense of Algorithmic Information
                  Theory among others.",
}
@Article{Hutter:05tree,
  author =       "Marco Zaffalon and Marcus Hutter",
  title =        "Robust Inference of Trees",
  journal =      "Annals of Mathematics and Artificial Intelligence",
  volume =       "45",
  pages =        "215--239",
  _month =        oct,
  year =         "2005",
  _publisher =   "Springer",
  bibtex =       "http://www.hutter1.net/official/bib.htm#tree",
  url =          "http://arxiv.org/abs/cs.LG/0511087",
  pdf =          "http://www.hutter1.net/ai/tree.pdf",
  ps =           "http://www.hutter1.net/ai/tree.ps",
  latex =        "http://www.hutter1.net/ai/tree.zip",
  project =      "http://www.hutter1.net/official/projects.htm#robust",
  doi =          "10.1007/s10472-005-9007-9",
  issn =         "1012-2443",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Robust inference, spanning trees, intervals,
                  dependence, graphical models, mutual information, imprecise
                  probabilities, imprecise Dirichlet model.",
  abstract =     "This paper is concerned with the reliable inference of optimal
                  tree-approximations to the dependency structure of an unknown
                  distribution generating data. The traditional approach to the
                  problem measures the dependency strength between random variables
                  by the index called mutual information. In this paper reliability
                  is achieved by Walley's imprecise Dirichlet model, which
                  generalizes Bayesian learning with Dirichlet priors. Adopting the
                  imprecise Dirichlet model results in posterior interval
                  expectation for mutual information, and in a set of plausible
                  trees consistent with the data. Reliable inference about the
                  actual tree is achieved by focusing on the substructure common to
                  all the plausible trees. We develop an exact algorithm that infers
                  the substructure in time O(m^4), m being the number of random
                  variables. The new algorithm is applied to a set of data sampled
                  from a known distribution. The method is shown to reliably infer
                  edges of the actual tree even when the data are very scarce,
                  unlike the traditional approach. Finally, we provide lower and
                  upper credibility limits for mutual information under the
                  imprecise Dirichlet model. These enable the previous developments
                  to be extended to a full inferential method for trees.",
}
@InProceedings{Hutter:05postbnd,
  author =       "Alexey Chernov and Marcus Hutter",
  title =        "Monotone Conditional Complexity Bounds on Future Prediction Errors",
  booktitle =    "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})",
  address =      "Singapore",
  series =       "LNAI",
  volume =       "3734",
  _editor =       "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita",
  publisher =    "Springer",
  pages =        "414--428",
  _month =        oct,
  year =         "2005",
  bibtex =       "http://www.hutter1.net/official/bib.htm#postbnd",
  url =          "http://arxiv.org/abs/cs.LG/0507041",
  pdf =          "http://www.hutter1.net/ai/postbnd.pdf",
  ps =           "http://www.hutter1.net/ai/postbnd.ps",
  latex =        "http://www.hutter1.net/ai/postbnd.tex",
  slides =       "http://www.hutter1.net/ai/spostbnd.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#ait",
  doi =          "10.1007/11564089_32",
  issn =         "0302-9743",
  isbn =         "3-540-29242-X",
  keywords =     "Kolmogorov complexity, posterior bounds,
                  online sequential prediction, Solomonoff prior,
                  monotone conditional complexity, total error,
                  future loss, randomness deficiency.",
  abstract =     "We bound the future loss when predicting any (computably)
                  stochastic sequence online. Solomonoff finitely bounded the total
                  deviation of his universal predictor M from the true
                  distribution m by the algorithmic complexity of m. Here we
                  assume we are at a time t>1 and already observed x=x_1...x_t.
                  We bound the future prediction performance on x_{t+1}x_{t+2}...
                  by a new variant of algorithmic complexity of m given x,
                  plus the complexity of the randomness deficiency of x. The new
                  complexity is monotone in its condition in the sense that this
                  complexity can only decrease if the condition is prolonged. We
                  also briefly discuss potential generalizations to Bayesian model
                  classes and to classification problems.",
  support =      "SNF grant 200020-100259 and 2100-67712",
  znote =        "Acceptance rate: 30/98 = 30\%",
}
@InProceedings{Hutter:05actexp2,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Defensive Universal Learning with Experts",
  booktitle =    "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})",
  address =      "Singapore",
  series =       "LNAI",
  volume =       "3734",
  _editor =       "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita",
  publisher =    "Springer",
  _month =        oct,
  pages =        "356--370",
  year =         "2005",
  bibtex =       "http://www.hutter1.net/official/bib.htm#actexp2",
  url =          "http://arxiv.org/abs/cs.LG/0507044",
  pdf =          "http://www.hutter1.net/ai/actexp2.pdf",
  ps =           "http://www.hutter1.net/ai/actexp2.ps",
  latex =        "http://www.hutter1.net/ai/actexp2.tex",
  slides =       "http://www.hutter1.net/ai/sactexp.pdf",
  slidesppt =    "http://www.hutter1.net/ai/sactexp.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#expert",
  doi =          "10.1007/11564089_28",
  issn =         "0302-9743",
  isbn =         "3-540-29242-X",
  keywords =     "Prediction with expert advice, responsive
                  environments, partial observation game, bandits, universal
                  learning, asymptotic optimality.",
  abstract =     "This paper shows how universal learning can be achieved with
                  expert advice. To this aim, we specify an experts algorithm with
                  the following characteristics: (a) it uses only feedback from the
                  actions actually chosen (bandit setup), (b) it can be applied with
                  countably infinite expert classes, and (c) it copes with losses
                  that may grow in time appropriately slowly. We prove loss bounds
                  against an adaptive adversary. From this, we obtain a master
                  algorithm for ``reactive'' experts problems, which means that the
                  master's actions may influence the behavior of the adversary. Our
                  algorithm can significantly outperform standard experts algorithms
                  on such problems. Finally, we combine it with a universal expert
                  class. The resulting universal learner performs -- in a certain
                  sense -- almost as well as any computable strategy, for any online
                  decision problem. We also specify the (worst-case) convergence
                  speed, which is very slow.",
  znote =        "Acceptance rate: 30/98 = 30\%",
}
@InProceedings{Hutter:05iors,
  author =       "Shane Legg and Marcus Hutter",
  title =        "A Universal Measure of Intelligence for Artificial Agents",
  booktitle =    "Proc. 21st International Joint Conf. on Artificial Intelligence ({IJCAI-2005})",
  pages =        "1509--1510",
  _editor =       "L. P. Kaelbling and A. Saffiotti",
  _publisher =    "Professional Book Center",
  address =      "Edinburgh, Scottland",
  _month =        aug,
  year =         "2005",
  bibtex =       "http://www.hutter1.net/official/bib.htm#iors",
  http =         "http://dl.acm.org/citation.cfm?id=1642293.1642533",
  pdf =          "http://www.hutter1.net/ai/iors.pdf",
  ps =           "http://www.hutter1.net/ai/iors.ps",
  slides =       "http://www.hutter1.net/ai/siors.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#uai",
  press =        "http://www.hutter1.net/official/press.htm#ior",
  isbn_print =   "0-938075-93-4",
  isbn_cd =      "0-938075-94-2",
  support =      "SNF grant 2100-67712",
  znote =        "Acceptance rate: 112/453 = 25\%",
}
@InProceedings{Hutter:05fuds,
  author =       "Shane Legg and Marcus Hutter",
  title =        "Fitness Uniform Deletion for Robust Optimization",
  booktitle =    "Proc. Genetic and Evolutionary Computation Conference ({GECCO'05})",
  address =      "Washington, OR, USA",
  editor =       "H.-G. Beyer et al.",
  publisher =    "ACM SigEvo",
  _month =        jun,
  year =         "2005",
  pages =        "1271--1278",
  bibtex =       "http://www.hutter1.net/official/bib.htm#fuds",
  http =         "http://www.hutter1.net/ai/fuds.htm",
  url =          "http://arxiv.org/abs/cs.NE/0504035",
  pdf =          "http://www.hutter1.net/ai/fuds.pdf",
  ps =           "http://www.hutter1.net/ai/fuds.ps",
  latex =        "http://www.hutter1.net/ai/fuds.zip",
  slides =       "http://www.hutter1.net/ai/sfuds.pdf",
  slidesppt =    "http://www.hutter1.net/ai/sfuds.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#optimize",
  press =        "http://www.hutter1.net/official/press.htm#fuss",
  code1 =        "http://www.hutter1.net/ai/fussdd.cpp",
  code2 =        "http://www.hutter1.net/ai/fussdd.h",
  code3 =        "http://www.hutter1.net/ai/fusstsp.cpp",
  code4 =        "http://www.hutter1.net/ai/fusstsp.h",
  doi =          "10.1145/1068009.1068216",
  isbn =         "1-59593-010-8",
  keywords =     "Evolutionary algorithm, deletion schemes, fitness evaluation,
                  optimization, fitness landscapes, (self)adaptation.",
  abstract =     "A commonly experienced problem with population based optimisation
                  methods is the gradual decline in population diversity that tends
                  to occur over time.  This can slow a system's progress or even
                  halt it completely if the population converges on a local optimum
                  from which it cannot escape.  In this paper we present the Fitness
                  Uniform Deletion Scheme (FUDS), a simple but somewhat
                  unconventional approach to this problem.  Under FUDS the deletion
                  operation is modified to only delete those individuals which are
                  ``common'' in the sense that there exist many other individuals of
                  similar fitness in the population.  This makes it impossible for
                  the population to collapse to a collection of highly related
                  individuals with similar fitness. Our experimental results on a
                  range of optimisation problems confirm this, in particular for
                  deceptive optimisation problems the performance is significantly
                  more robust to variation in the selection intensity.",
  znote =        "Acceptance rate: 253/549 = 46\%",
}
@Article{Hutter:05expertx,
  author =       "Marcus Hutter and Jan Poland",
  title =        "Adaptive Online Prediction by Following the Perturbed Leader",
  volume =       "6",
  _month =        apr,
  year =         "2005",
  pages =        "639--660",
  journal =      "Journal of Machine Learning Research",
  publisher =    "Microtome",
  bibtex =       "http://www.hutter1.net/official/bib.htm#expertx",
  http =         "http://www.hutter1.net/ai/expertx.htm",
  url =          "http://arxiv.org/abs/cs.AI/0504078",
  url2 =         "http://www.jmlr.org/papers/v6/hutter05a.html",
  pdf =          "http://www.hutter1.net/ai/expertx.pdf",
  ps =           "http://www.hutter1.net/ai/expertx.ps",
  latex =        "http://www.hutter1.net/ai/expertx.tex",
  slides =       "http://www.hutter1.net/ai/sexpert.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#expert",
  issn =         "1532-4435",
  keywords =     "Prediction with Expert Advice, Follow the Perturbed Leader,
                  general weights, adaptive learning rate,
                  adaptive adversary, hierarchy of experts,
                  expected and high probability bounds, general alphabet and loss,
                  online sequential prediction.",
  abstract =     "When applying aggregating strategies to Prediction with Expert
                  Advice, the learning rate must be adaptively tuned. The natural
                  choice of sqrt(complexity/current loss) renders the analysis of
                  Weighted Majority derivatives quite complicated. In particular,
                  for arbitrary weights there have been no results proven so far.
                  The analysis of the alternative ``Follow the Perturbed Leader''
                  (FPL) algorithm from Kalai & Vempala (2003) (based on Hannan's
                  algorithm) is easier. We derive loss bounds for adaptive learning
                  rate and both finite expert classes with uniform weights and
                  countable expert classes with arbitrary weights. For the former
                  setup, our loss bounds match the best known results so far, while
                  for the latter our results are new.",
}
@Article{Hutter:05mifs,
  author =       "Marcus Hutter and Marco Zaffalon",
  title =        "Distribution of Mutual Information from Complete and Incomplete Data",
  journal =      "Computational Statistics \& Data Analysis",
  volume =       "48",
  number =       "3",
  pages =        "633--657",
  _month =        mar,
  year =         "2005",
  publisher =    "Elsevier Science",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mifs",
  http =         "http://www.hutter1.net/ai/mifs.htm",
  url =          "http://arxiv.org/abs/cs.LG/0403025",
  pdf =          "http://www.hutter1.net/ai/mifs.pdf",
  ps =           "http://www.hutter1.net/ai/mifs.ps",
  latex =        "http://www.hutter1.net/ai/mifs.zip",
  slides =       "http://www.hutter1.net/ai/smimiss.pdf",
  slidesppt =    "http://www.hutter1.net/ai/smimiss.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#robust",
  code =         "http://www.hutter1.net/ai/mifs.cpp",
  doi =          "10.1016/j.csda.2004.03.010",
  issn =         "0167-9473",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Mutual information, cross entropy, Dirichlet distribution, second
                  order distribution, expectation and variance of mutual
                  information, feature selection, filters, naive Bayes classifier,
                  Bayesian statistics.",
  abstract =     "Mutual information is widely used, in a descriptive way, to measure the
                  stochastic dependence of categorical random variables. In order to address
                  questions such as the reliability of the descriptive value, one must consider
                  sample-to-population inferential approaches. This paper deals with the
                  posterior distribution of mutual information, as obtained in a Bayesian
                  framework by a second-order Dirichlet prior distribution. The exact analytical
                  expression for the mean, and analytical approximations for the variance,
                  skewness and kurtosis are derived. These approximations have a guaranteed
                  accuracy level of the order O(1/n^3), where n is the sample size. Leading order
                  approximations for the mean and the variance are derived in the case of
                  incomplete samples. The derived analytical expressions allow the distribution
                  of mutual information to be approximated reliably and quickly. In fact, the
                  derived expressions can be computed with the same order of complexity needed
                  for descriptive mutual information. This makes the distribution of mutual
                  information become a concrete alternative to descriptive mutual information in
                  many applications which would benefit from moving to the inductive side. Some
                  of these prospective applications are discussed, and one of them, namely
                  feature selection, is shown to perform significantly better when inductive
                  mutual information is used.",
}
@InProceedings{Hutter:05mdlreg,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Strong Asymptotic Assertions for Discrete {MDL} in Regression and Classification",
  booktitle =    "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})",
  address =      "Enschede",
  _editor =       "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt",
  pages =        "67--72",
  _month =        feb,
  year =         "2005",
  _number =       "WP05-03",
  _series =       "CTIT Workshop Proceedings Series",
  _organization = "CTIT Research Institute, University of Twente",
  bibtex =       "http://www.hutter1.net/official/bib.htm#mdlreg",
  url =          "http://arxiv.org/abs/math.ST/0502315",
  conf =         "http://hmi.ewi.utwente.nl/conference/benelearn2005",
  pdf =          "http://www.hutter1.net/ai/mdlreg.pdf",
  ps =           "http://www.hutter1.net/ai/mdlreg.ps",
  latex =        "http://www.hutter1.net/ai/mdlreg.tex",
  slides =       "http://www.hutter1.net/ai/smdlreg.pdf",
  slidesppt =    "http://www.hutter1.net/ai/smdlreg.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#mdl",
  issn =         "0929-0672",
  keywords =     "Regression, Classification, Sequence Prediction,
                  Machine Learning, Minimum Description Length, Bayes Mixture,
                  Marginalization, Convergence, Discrete Model Classes.",
  abstract =     "We study the properties of the MDL (or maximum penalized
                  complexity) estimator for Regression and Classification, where the
                  underlying model class is countable. We show in particular a
                  finite bound on the Hellinger losses under the only assumption
                  that there is a ``true'' model contained in the class. This implies
                  almost sure convergence of the predictive distribution to the true
                  one at a fast rate. It corresponds to Solomonoff's central theorem
                  of universal induction, however with a bound that is exponentially
                  larger.",
}
@InProceedings{Hutter:05actexp,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Master Algorithms for Active Experts Problems based on Increasing Loss Values",
  booktitle =    "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})",
  address =      "Enschede",
  _editor =       "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt",
  pages =        "59--66",
  _month =        feb,
  year =         "2005",
  _number =       "WP05-03",
  _series =       "CTIT Workshop Proceedings Series",
  _organization = "CTIT Research Institute, University of Twente",
  bibtex =       "http://www.hutter1.net/official/bib.htm#actexp",
  url =          "http://arxiv.org/abs/cs.LG/0502067",
  conf =         "http://hmi.ewi.utwente.nl/conference/benelearn2005",
  pdf =          "http://www.hutter1.net/ai/actexp.pdf",
  ps =           "http://www.hutter1.net/ai/actexp.ps",
  latex =        "http://www.hutter1.net/ai/actexp.tex",
  slides =       "http://www.hutter1.net/ai/sactexp.pdf",
  slidesppt =    "http://www.hutter1.net/ai/sactexp.ppt",
  project =      "http://www.hutter1.net/official/projects.htm#expert",
  issn =         "0929-0672",
  keywords =     "Prediction with expert advice, responsive
                  environments, partial observation game, bandits, universal
                  learning, asymptotic optimality.",
  abstract =     "We specify an experts algorithm with the following
                  characteristics: (a) it uses only feedback from the actions
                  actually chosen (bandit setup), (b) it can be applied with
                  countably infinite expert classes, and (c) it copes with
                  losses that may grow in time appropriately slowly. We
                  prove loss bounds against an adaptive adversary. From this, we
                  obtain master algorithms for ``active experts problems'', which
                  means that the master's actions may influence the behavior of
                  the adversary. Our algorithm can significantly outperform
                  standard experts algorithms on such problems. Finally, we
                  combine it with a universal expert class. This results in a
                  (computationally infeasible) universal master algorithm
                  which performs - in a certain sense - almost as well as any
                  computable strategy, for any online problem.",
}
@Slides{Hutter:05predict,
  author =       "Marcus Hutter",
  title =        "How to predict with {Bayes}, {MDL}, and {Experts}",
  _month =        jan,
  year =         "2005",
  note =         "Presented at the Machine Learning Summer School (MLSS)",
  http =         "http://canberra05.mlss.cc/",
  url =          "http://www.idsia.ch/~marcus/ai/predict.htm",
  slides =       "http://www.idsia.ch/~marcus/ai/spredict.pdf",
}
@InProceedings{Hutter:05bayestree,
  author =       "Marcus Hutter",
  title =        "Fast Non-Parametric {B}ayesian Inference on Infinite Trees",
  booktitle =    "Proc. 10th International Conf. on Artificial Intelligence and Statistics ({AISTATS-2005})",
  _address =      "Barbados",
  _editor =       "R. G. Cowell and Z. Ghahramani",
  publisher =    "Society for Artificial Intelligence and Statistics",
  pages =        "144--151",
  _month =        jan,
  year =         "2005",
  bibtex =       "http://www.hutter1.net/official/bib.htm#bayestree",
  http =         "http://www.hutter1.net/ai/bayestree.htm",
  url =          "http://arxiv.org/abs/math.PR/0411515",
  pdf =          "http://www.hutter1.net/ai/bayestree.pdf",
  ps =           "http://www.hutter1.net/ai/bayestree.ps",
  latex =        "http://www.hutter1.net/ai/bayestree.zip",
  slides =       "http://www.hutter1.net/ai/sbayestree.pdf",
  project =      "http://www.hutter1.net/official/projects.htm#bayes",
  code =         "http://www.hutter1.net/ai/bayestree.c",
  isbn =         "0-9727358-1-X",
  keywords =     "Bayesian density estimation, exact linear time algorithm,
                  non-parametric inference, adaptive infinite tree, Polya tree,
                  scale invariance.",
  abstract =     "Given i.i.d. data from an unknown distribution,
                  we consider the problem of predicting future items.
                  An adaptive way to estimate the probability density
                  is to recursively subdivide the domain to an appropriate
                  data-dependent granularity. A Bayesian would assign a
                  data-independent prior probability to ``subdivide'', which leads
                  to a prior over infinite(ly many) trees. We derive an exact, fast,
                  and simple inference algorithm for such a prior, for the data
                  evidence, the predictive distribution, the effective model
                  dimension, and other quantities.",
  znote =        "Acceptance rate: 57/150 = 38\%",
}

%-------------Publications-of-Marcus-Hutter-2004--------------%

@TechReport{Hutter:04mdp,
  author =       "Shane Legg and Marcus Hutter",
  number =      "IDSIA-21-04",
  title =        "Ergodic {MDP}s Admit Self-Optimising Policies",
  year =         "2004",
  institution =   "{IDSIA}",
}
TechReport{Hutter:04env,
  author =       "Shane Legg and Marcus Hutter",
  number =      "IDSIA-20-04",
  title =        "A Taxonomy for Abstract Environments",
  year =         "2004",
  institution =   "{IDSIA}",
}
@Book{Hutter:04uaibook,
  author =       "Marcus Hutter",
  title =        "Universal Artificial Intelligence:
                  Sequential Decisions based on Algorithmic Probability",
  _series =       "EATCS",
  publisher =    "Springer",
  address =      "Berlin",
  year =         "2005",
  isbn =         "3-540-22139-5",
  isbn-online =  "978-3-540-26877-2",
  doi =          "10.1007/b138233",
  note =         "300 pages, http://www.hutter1.net/ai/uaibook.htm",
  url =          "http://www.hutter1.net/ai/uaibook.htm",
  review1 =      "AIJ: http://dx.doi.org/10.1016/j.artint.2006.10.005",
  review2 =      "ACM: http://www.reviews.com/review/review_review.cfm?review_id=131175",
  reviews =      "Amazon: http://www.amazon.com/exec/obidos/redirect?tag=homepageofm0a-20&path=ASIN/3540221395",
  slides =       "http://www.hutter1.net/ai/suaibook.pdf",
  video =        "http://vimeo.com/14888930",
  keywords =     "Artificial intelligence; algorithmic probability;
                  sequential decision theory; Solomonoff induction;
                  Kolmogorov complexity; Bayes mixture distributions;
                  reinforcement learning; universal sequence prediction;
                  tight loss and error bounds; Levin search;
                  strategic games; function minimization; supervised learning.",
  abstract =     "This book presents sequential decision theory from a
                  novel algorithmic information theory perspective. While the former
                  theory is suited for active agents in known environments, the
                  latter is suited for passive prediction of unknown environments.
                  The book introduces these two well-known but very different ideas
                  and removes the limitations by unifying them to one parameter-free
                  theory of an optimal reinforcement learning agent interacting with
                  an arbitrary unknown world. Most if not all AI problems can easily
                  be formulated within this theory, which reduces the conceptual
                  problems to pure computational ones. Considered problem classes
                  include sequence prediction, strategic games, function
                  minimization, reinforcement and supervised learning. Formal
                  definitions of intelligence order relations, the horizon problem
                  and relations to other approaches to AI are discussed. One
                  intention of this book is to excite a broader AI audience about
                  abstract algorithmic information theory concepts, and conversely
                  to inform theorists about exciting applications to AI.",
  support =      "SNF grant 2000-61847",
}
@InProceedings{Hutter:04mlconvx,
  author =       "Marcus Hutter and Andrej A. Muchnik",
  title =        "Universal Convergence of Semimeasures on Individual Random Sequences",
  booktitle =    "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
  address =      "Padova, Italy",
  series =       "LNAI",
  volume =       "3244",
  _editor =       "S. Ben-David and J. Case and A. Maruoka",
  publisher =    "Springer",
  pages =        "234--248",
  year =         "2004",
  doi =          "10.1007/978-3-540-30215-5_19",
  issn =         "0302-9743",
  isbn =         "3-540-23356-3",
  http =         "http://www.hutter1.net/ai/mlconvx.htm",
  url =          "http://arxiv.org/abs/cs.LG/0407057",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  universal enumerable semimeasure; mixture distributions;
                  posterior convergence; Martin-L{\"o}f randomness;
                  quasimeasures.",
  abstract =     "Solomonoff's central result on induction is that the posterior of
                  a universal semimeasure M converges rapidly and with probability
                  1 to the true sequence generating posterior mu, if the latter is
                  computable. Hence, M is eligible as a universal sequence predictor
                  in case of unknown mu. Despite some nearby results and proofs in
                  the literature, the stronger result of convergence for all
                  (Martin-Loef) random sequences remained open. Such a convergence
                  result would be particularly interesting and natural, since
                  randomness can be defined in terms of M itself. We show that there
                  are universal semimeasures M which do not converge for all random
                  sequences, i.e. we give a partial negative answer to the open
                  problem. We also provide a positive answer for some non-universal
                  semimeasures. We define the incomputable measure D as a mixture
                  over all computable measures and the enumerable semimeasure W as a
                  mixture over all enumerable nearly-measures. We show that W
                  converges to D and D to mu on all random sequences. The Hellinger
                  distance measuring closeness of two distributions plays
                  a central role.",
  znote =        "Acceptance rate: 29/91 = 32\%",
}
@InProceedings{Hutter:04expert,
  author =       "Marcus Hutter and Jan Poland",
  title =        "Prediction with Expert Advice by Following the Perturbed Leader for General Weights",
  booktitle =    "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
  address =      "Padova, Italy",
  series =       "LNAI",
  volume =       "3244",
  _editor =       "S. Ben-David and J. Case and A. Maruoka",
  publisher =    "Springer",
  pages =        "279--293",
  year =         "2004",
  doi =          "10.1007/978-3-540-30215-5_22",
  issn =         "0302-9743",
  isbn =         "3-540-23356-3",
  http =         "http://www.hutter1.net/ai/expert.htm",
  url =          "http://arxiv.org/abs/cs.LG/0405043",
  keywords =     "Prediction with Expert Advice, Follow the Perturbed Leader,
                  general weights, adaptive learning rate,
                  hierarchy of experts, expected and high probability bounds,
                  general alphabet and loss, online sequential prediction.",
  abstract =     "When applying aggregating strategies to Prediction with Expert
                  Advice, the learning rate must be adaptively tuned. The natural
                  choice of sqrt(complexity/current loss) renders the
                  analysis of Weighted Majority derivatives quite complicated. In
                  particular, for arbitrary weights there have been no results
                  proven so far. The analysis of the alternative ``Follow the
                  Perturbed Leader'' (FPL) algorithm from Kalai \& Vempala (2003) (based on
                  Hannan's algorithm) is easier. We derive loss bounds for adaptive
                  learning rate and both finite expert classes with uniform weights
                  and countable expert classes with arbitrary weights. For the
                  former setup, our loss bounds match the best known results so far,
                  while for the latter our results are new.",
  znote =        "Acceptance rate: 29/91 = 32\%",
}
@InProceedings{Hutter:04mdlspeed,
  author =       "Jan Poland and Marcus Hutter",
  title =        "On the convergence speed of {MDL} predictions for {B}ernoulli sequences",
  booktitle =    "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
  address =      "Padova, Italy",
  series =       "LNAI",
  volume =       "3244",
  _editor =       "S. Ben-David and J. Case and A. Maruoka",
  publisher =    "Springer",
  pages =        "294--308",
  year =         "2004",
  doi =          "10.1007/978-3-540-30215-5_23",
  issn =         "0302-9743",
  isbn =         "3-540-23356-3",
  http =         "http://www.hutter1.net/ai/mdlspeed.htm",
  url =          "http://arxiv.org/abs/cs.LG/0407039",
  keywords =     "MDL, Minimum Description Length, Convergence Rate,
                  Prediction, Bernoulli, Discrete Model Class.",
  abstract =     "We consider the Minimum Description Length principle for online
                  sequence prediction. If the underlying model class is discrete,
                  then the total expected square loss is a particularly interesting
                  performance measure: (a) this quantity is bounded, implying
                  convergence with probability one, and (b) it additionally
                  specifies a `rate of convergence'. Generally, for MDL only
                  exponential loss bounds hold, as opposed to the linear bounds for
                  a Bayes mixture. We show that this is even the case if the model
                  class contains only Bernoulli distributions. We derive a new upper
                  bound on the prediction error for countable Bernoulli classes.
                  This implies a small bound (comparable to the one for Bayes
                  mixtures) for certain important model classes. The results apply
                  to many Machine Learning tasks including classification and
                  hypothesis testing. We provide arguments that our theorems
                  generalize to countable classes of i.i.d. models.",
  znote =        "Acceptance rate: 29/91 = 32\%",
}
@TechReport{Hutter:04bayespea,
  author =       "Marcus Hutter",
  title =        "Online Prediction -- {B}ayes versus Experts",
  institution =  "http://www.idsia.ch/$_{^\sim}$marcus/ai/bayespea.htm",
  _month =        jul,
  pages =        "4 pages",
  year =         "2004",
  note =         "Presented at the {\em EU PASCAL Workshop on
                  Learning Theoretic and Bayesian Inductive Principles (LTBIP-2004)}",
  url =          "http://www.hutter1.net/ai/bayespea.htm",
  ps =           "http://www.hutter1.net/ai/bayespea.ps",
  pdf =          "http://www.hutter1.net/ai/bayespea.pdf",
  slides =       "http://www.hutter1.net/ai/sbayespea.pdf",
  keywords =     "Bayesian sequence prediction;
                  Prediction with Expert Advice;
                  general weights, alphabet and loss.",
  abstract =     "We derive a very general regret bound in the framework of
                  prediction with expert advice, which challenges the best known
                  regret bound for Bayesian sequence prediction. Both bounds of the
                  form $\sqrt{\mbox{Loss}\times\mbox{complexity}}$ hold for any
                  bounded loss-function, any prediction and observation spaces,
                  arbitrary expert/environment classes and weights, and unknown
                  sequence length.",
}
@InProceedings{Hutter:04mdl2p,
  author =       "Jan Poland and Marcus Hutter",
  title =        "Convergence of Discrete {MDL} for Sequential Prediction",
  booktitle =    "Proc. 17th Annual Conf. on Learning Theory ({COLT'04})",
  address =      "Banff, Canada",
  series =       "LNAI",
  volume =       "3120",
  _editor =       "J. Shawe-Taylor and Y. Singer",
  publisher =    "Springer",
  pages =        "300--314",
  year =         "2004",
  doi =          "10.1007/978-3-540-27819-1_21",
  isbn =         "3-540-22282-0",
  http =         "http://www.hutter1.net/ai/mdl2p.htm",
  url =          "http://arxiv.org/abs/cs.LG/0404057",
  keywords =     "Minimum Description Length, Sequence Prediction,
                  Convergence, Discrete Model Classes, Universal Induction,
                  Stabilization, Algorithmic Information Theory.",
  abstract =     "We study the properties of the Minimum Description Length principle for
                  sequence prediction, considering a two-part MDL estimator which is chosen from
                  a countable class of models. This applies in particular to the important case
                  of universal sequence prediction, where the model class corresponds to all
                  algorithms for some fixed universal Turing machine (this correspondence is by
                  enumerable semimeasures, hence the resulting models are stochastic). We prove
                  convergence theorems similar to Solomonoff's theorem of universal induction,
                  which also holds for general Bayes mixtures. The bound characterizing the
                  convergence speed for MDL predictions is exponentially larger as compared to
                  Bayes mixtures. We observe that there are at least three different ways of
                  using MDL for prediction. One of these has worse prediction properties, for
                  which predictions only converge if the MDL estimator stabilizes. We establish
                  sufficient conditions for this to occur. Finally, some immediate consequences
                  for complexity relations and randomness criteria are proven.",
  znote =        "Acceptance rate: 44/107 = 41\%",
}
@InProceedings{Hutter:04fussexp,
  author =       "Shane Legg and Marcus Hutter and Akshat Kumar",
  title =        "Tournament versus Fitness Uniform Selection",
  booktitle =    "Proc. 2004 Congress on Evolutionary Computation ({CEC'04})",
  address =      "Portland, OR, USA",
  xeditor =       "??",
  publisher =    "IEEE",
  isbn =         "0-7803-8515-2",
  _month =        jun,
  year =         "2004",
  pages =        "2144--2151",
  keywords =     "Selection schemes, fitness evaluation, optimization,
                  fitness landscapes, basic working principles of evolutionary computations,
                  (self)adaptation, evolutionary algorithm,
                  deceptive \& multimodal optimization problems.",
  http =         "http://www.hutter1.net/ai/fussexp.htm",
  url =          "http://arxiv.org/abs/cs.LG/0403038",
  doi =          "10.1109/CEC.2004.1331162",
  press =        "http://www.trnmag.com/Stories/032801/Diversity_trumps_fitness_032801.html",
  abstract =     "In evolutionary algorithms a critical parameter that must be tuned is
                  that of selection pressure.  If it is set too low then the rate of
                  convergence towards the optimum is likely to be slow.  Alternatively
                  if the selection pressure is set too high the system is likely to
                  become stuck in a local optimum due to a loss of diversity in the
                  population. The recent Fitness Uniform Selection Scheme (FUSS) is a
                  conceptually simple but somewhat radical approach to addressing this
                  problem --- rather than biasing the selection towards higher fitness,
                  FUSS biases selection towards sparsely populated fitness levels. In
                  this paper we compare the relative performance of FUSS with the well
                  known tournament selection scheme on a range of problems.",
  znote =        "Acceptance rate: 300/460 = 65\%",
}

%-------------Publications-of-Marcus-Hutter-2003--------------%

@PhDThesis{Hutter:03habil,
  author =       "Marcus Hutter",
  school =       "Fakult{\"a}t f{\"u}r Informatik",
  address =      "TU M{\"u}nchen",
  title =        "Optimal Sequential Decisions based on Algorithmic Probability",
  year =         "2003",
  pages =        "1--288",
  http =         "http://www.hutter1.net/ai/habil.htm",
  url =          "http://arxiv.org/abs/cs.AI/0306091",
  keywords =     "Artificial intelligence; algorithmic probability;
                  sequential decision theory; Solomonoff induction;
                  Kolmogorov complexity; Bayes-mixture distributions;
                  reinforcement learning; universal sequence prediction;
                  tight loss and error bounds; Levin search;
                  strategic games; function minimization;
                  supervised learning.",
  abstract =     "Decision theory formally solves the problem of rational agents in
                  uncertain worlds if the true environmental prior probability
                  distribution is known. Solomonoff's theory of universal induction
                  formally solves the problem of sequence prediction for unknown
                  prior distribution. In this \thesis\ both ideas are unified to one
                  parameter-free theory for universal Artificial Intelligence. We
                  give strong arguments that the resulting AIXI model is the most
                  intelligent unbiased agent possible. We outline for a number of
                  problem classes, including sequence prediction, strategic games,
                  function minimization, reinforcement and supervised learning, how
                  the AIXI model can formally solve them. The major drawback of the
                  AIXI model is that it is uncomputable. To overcome this problem,
                  we construct a modified algorithm AIXI$tl$, which is still
                  effectively more intelligent than any other time $t$ and length $l$
                  bounded agent. The computation time of AIXI$tl$ is of the order
                  $t\cdot 2^l$. The discussion includes formal definitions of
                  intelligence order relations, the horizon problem and relations of
                  the AIXI theory to other AI approaches.",
}
@InProceedings{Hutter:03unimdl,
  author =       "Marcus Hutter",
  title =        "Sequence Prediction based on Monotone Complexity",
  booktitle =    "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})",
  address =      "Washington, DC, USA",
  series =       "LNAI",
  volume =       "2777",
  _editor =       "B. Sch{\"o}lkopf and M. K. Warmuth",
  publisher =    "Springer",
  pages =        "506--521",
  year =         "2003",
  isbn =         "3-540-40720-0",
  doi =          "10.1007/978-3-540-45167-9_37",
  http =         "http://www.hutter1.net/ai/unimdl.htm",
  url =          "http://arxiv.org/abs/cs.AI/0306036",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  Solomonoff's prior; Monotone Kolmogorov Complexity;
                  Minimal Description Length; Convergence;
                  Self-Optimizingness",
  abstract =     "This paper studies sequence prediction based on the
                  monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on
                  universal MDL. $m$ is extremely close to Solomonoff's prior $M$,
                  the latter being an excellent predictor in deterministic as well
                  as probabilistic environments, where performance is measured in
                  terms of convergence of posteriors or losses. Despite this
                  closeness to $M$, it is difficult to assess the prediction quality
                  of $m$, since little is known about the closeness of their
                  posteriors, which are the important quantities for prediction.
                  We show that for deterministic computable environments, the
                  ``posterior'' and losses of $m$ converge, but rapid convergence
                  could only be shown on-sequence; the off-sequence behavior is
                  unclear. In probabilistic environments, neither the posterior nor
                  the losses converge, in general.",
  znote =        "Acceptance rate: 49/92 = 53\%",
}
@InProceedings{Hutter:03unipriors,
  author =       "Marcus Hutter",
  title =        "On the Existence and Convergence of Computable Universal Priors",
  booktitle =    "Proc. 14th International Conf. on Algorithmic Learning Theory ({ALT'03})",
  address =      "Sapporo, Japan",
  _editor =       "Ricard Gavald{\'a} and Klaus P. Jantke and Eiji Takimoto",
  series =       "LNAI",
  volume =       "2842",
  publisher =    "Springer",
  pages =        "298--312",
  _month =        sep,
  year =         "2003",
  ISSN =         "0302-9743",
  isbn =         "3-540-20291-9",
  doi =          "10.1007/978-3-540-39624-6_24",
  http =         "http://www.hutter1.net/ai/uniprior.htm",
  url =          "http://arxiv.org/abs/cs.LG/0305052",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  Solomonoff's prior; universal probability;
                  mixture distributions; posterior convergence;
                  computability concepts; Martin-L{\"o}f randomness.",
  abstract =     "Solomonoff unified Occam's razor and Epicurus' principle
                  of multiple explanations to one elegant, formal, universal theory
                  of inductive inference, which initiated the field of algorithmic
                  information theory. His central result is that the posterior of
                  his universal semimeasure $M$ converges rapidly to the true
                  sequence generating posterior $\mu$, if the latter is computable.
                  Hence, $M$ is eligible as a universal predictor in case of unknown
                  $\mu$. We investigates the existence, computability and convergence of
                  universal (semi)measures for a hierarchy of computability classes:
                  finitely computable, estimable, (co)enumerable, and approximable.
                  For instance, $\MM(x)$ is known to be enumerable, but not finitely
                  computable, and to dominates all enumerable semimeasures.
                  We define seven classes of (semi)measures based on these four
                  computability concepts. Each class may or may not contain a
                  (semi)measures which dominates all elements of another class. The
                  analysis of these 49 cases can be reduced to four basic cases, two
                  of them being new. We present proofs for discrete and continuous
                  semimeasures.
                  We also investigate more closely the type of convergence, possibly
                  implied by universality (in difference and in ratio, with probability
                  1, in mean sum, and for Martin-L{\"o}f random sequences).",
  znote =        "Acceptance rate: 19/37 = 51\%?",
}
@InProceedings{Hutter:03mlconv,
  author =       "Marcus Hutter",
  title =        "An Open Problem Regarding the Convergence
                  of Universal A Priori Probability",
  booktitle =    "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})",
  address =      "Washington, DC, USA",
  series =       "LNAI",
  volume =       "2777",
  _editor =       "B. Sch{\"o}lkopf and M. K. Warmuth",
  publisher =    "Springer",
  pages =        "738--740",
  year =         "2003",
  isbn =         "3-540-40720-0",
  doi =          "10.1007/978-3-540-45167-9_58",
  url =          "http://www.hutter1.net/ai/mlconv.htm",
  keywords =     "Sequence prediction; Algorithmic Information Theory;
                  Solomonoff's prior; universal probability;
                  posterior convergence; Martin-L{\"o}f randomness.",
  abstract =     "Is the textbook result that Solomonoff's universal
                  posterior converges to the true posterior for all Martin-L{\"o}f
                  random sequences true?",
}
@Article{Hutter:03optisp,
  author =       "Marcus Hutter",
  title =        "Optimality of Universal {B}ayesian Prediction for General Loss and Alphabet",
  _month =        Nov,
  volume =       "4",
  year =         "2003",
  pages =        "971--1000",
  journal =      "Journal of Machine Learning Research",
  publisher =    "MIT Press",
  http =         "http://www.hutter1.net/ai/optisp.htm",
  url =          "http://arxiv.org/abs/cs.LG/0311014",
  url2 =         "http://www.jmlr.org/papers/volume4/hutter03a/",
  url3 =         "http://www.jmlr.org/papers/v4/hutter03a.html",
  issn =         "1532-4435",
  keywords =     "Bayesian sequence prediction; mixture distributions; Solomonoff
                  induction; Kolmogorov complexity; learning; universal probability;
                  tight loss and error bounds; Pareto-optimality; games of chance;
                  classification.",
  abstract =     "Various optimality properties of universal sequence predictors
                  based on Bayes-mixtures in general, and Solomonoff's prediction
                  scheme in particular, will be studied. The probability of
                  observing $x_t$ at time $t$, given past observations
                  $x_1...x_{t-1}$ can be computed with the chain rule if the true
                  generating distribution $\mu$ of the sequences $x_1x_2x_3...$ is
                  known. If $\mu$ is unknown, but known to belong to a countable or
                  continuous class $\M$ one can base ones prediction on the
                  Bayes-mixture $\xi$ defined as a $w_\nu$-weighted sum or integral
                  of distributions $\nu\in\M$. The cumulative expected loss of the
                  Bayes-optimal universal prediction scheme based on $\xi$ is shown
                  to be close to the loss of the Bayes-optimal, but infeasible
                  prediction scheme based on $\mu$. We show that the bounds are
                  tight and that no other predictor can lead to significantly
                  smaller bounds. Furthermore, for various performance measures, we
                  show Pareto-optimality of $\xi$ and give an Occam's razor argument
                  that the choice $w_\nu\sim 2^{-K(\nu)}$ for the weights is
                  optimal, where $K(\nu)$ is the length of the shortest program
                  describing $\nu$. The results are applied to games of chance,
                  defined as a sequence of bets, observations, and rewards. The
                  prediction schemes (and bounds) are compared to the popular
                  predictors based on expert advice. Extensions to infinite
                  alphabets, partial, delayed and probabilistic prediction,
                  classification, and more active systems are briefly discussed.",
  znote =        "Inofficial numbers: Acceptance rate: 27\%",
}
@InProceedings{Hutter:03idm,
  author =       "Marcus Hutter",
  title =        "Robust Estimators under the {I}mprecise {D}irichlet {M}odel",
  booktitle =    "Proc. 3rd International Symposium on
                  Imprecise Probabilities and Their Application ({ISIPTA-2003})",
  _editor =       "Jean-Marc Bernard and Teddy Seidenfeld and Marco Zaffalon",
  publisher =    "Carleton Scientific",
  series =       "Proceedings in Informatics",
  volume =       "18",
  address =      "Lugano,Switzerland",
  year =         "2003",
  pages =        "274--289",
  isbn =         "1-894145-17-8",
  http =         "http://www.hutter1.net/ai/idm.htm",
  url =          "http://arxiv.org/abs/math.PR/0305121",
  keywords =     "Imprecise Dirichlet Model; exact, conservative, approximate,
                  robust, confidence interval estimates; entropy; mutual information.",
  abstract =     "Walley's Imprecise Dirichlet Model (IDM) for categorical data
                  overcomes several fundamental problems which other approaches to
                  uncertainty suffer from. Yet, to be useful in practice, one needs
                  efficient ways for computing the imprecise=robust sets or
                  intervals. The main objective of this work is to derive exact,
                  conservative, and approximate, robust and credible interval
                  estimates under the IDM for a large class of statistical
                  estimators, including the entropy and mutual information.",
  znote =        "Inofficial numbers: Acceptance rate: 44/55 = 80\% ?",
}
@InProceedings{Hutter:03mimiss,
  author =       "Marcus Hutter and Marco Zaffalon",
  title =        "Bayesian Treatment of Incomplete Discrete Data applied
                  to Mutual Information and Feature Selection",
  _month =        sep,
  year =         "2003",
  pages =        "396--406",
  series =       "LNAI",
  volume =       "2821",
  booktitle =    "Proc. 26th German Conf. on Artificial Intelligence (KI-2003)",
  _editor =       "A. G{\"u}nter, R. Kruse and B. Neumann",
  address =      "Hamburg, Germany",
  publisher =    "Springer",
  doi =          "10.1007/978-3-540-39451-8_29",
  issn =         "0302-9743",
  isbn =         "3-540-00168-9",
  http =         "http://www.hutter1.net/ai/mimiss.htm",
  url =          "http://arxiv.org/abs/cs.LG/0306126",
  keywords =     "Incomplete data, Bayesian statistics, expectation maximization,
                  global optimization, Mutual Information, Cross Entropy, Dirichlet
                  distribution, Second order distribution, Credible intervals,
                  expectation and variance of mutual information, missing data,
                  Robust feature selection, Filter approach, naive Bayes classifier.",
  abstract =     "Given the joint chances of a pair of random variables one can
                  compute quantities of interest, like the mutual information. The
                  Bayesian treatment of unknown chances involves computing, from a
                  second order prior distribution and the data likelihood, a
                  posterior distribution of the chances. A common treatment of
                  incomplete data is to assume ignorability and determine the
                  chances by the expectation maximization (EM) algorithm. The two
                  different methods above are well established but typically
                  separated. This paper joins the two approaches in the case of
                  Dirichlet priors, and derives efficient approximations for the
                  mean, mode and the (co)variance of the chances and the mutual
                  information. Furthermore, we prove the unimodality of the
                  posterior distribution, whence the important property of
                  convergence of EM to the global maximum in the chosen framework.
                  These results are applied to the problem of selecting features for
                  incremental learning and naive Bayes classification. A fast filter
                  based on the distribution of mutual information is shown to
                  outperform the traditional filter based on empirical mutual
                  information on a number of incomplete real data sets.",
  znote =        "Acceptance rate: 42/90 = 46\%",
}
@Article{Hutter:03spupper,
  author =       "Marcus Hutter",
  title =        "Convergence and Loss Bounds for {Bayesian} Sequence Prediction",
  _month =        aug,
  volume =       "49",
  number =       "8",
  year =         "2003",
  pages =        "2061--2067",
  address =      "Manno(Lugano), Switzerland",
  journal =      "IEEE Transactions on Information Theory",
  doi =          "10.1109/TIT.2003.814488",
  issn =         "0018-9448",
  http =         "http://www.hutter1.net/ai/spupper.htm",
  url =          "http://arxiv.org/abs/cs.LG/0301014",
  keywords =     "Bayesian sequence prediction;
                  general loss function and bounds;
                  convergence; mixture distributions.",
  abstract =     "The probability of observing $x_t$ at time $t$, given past
                  observations $x_1...x_{t-1}$ can be computed with Bayes rule if
                  the true generating distribution $\mu$ of the sequences
                  $x_1x_2x_3...$ is known. If $\mu$ is unknown, but known to belong
                  to a class $M$ one can base ones prediction on the Bayes mix
                  $\xi$ defined as a weighted sum of distributions $\nu\in M$.
                  Various convergence results of the mixture posterior $\xi_t$ to
                  the true posterior $\mu_t$ are presented. In particular a new
                  (elementary) derivation of the convergence $\xi_t/\mu_t\to 1$ is
                  provided, which additionally gives the rate of convergence. A
                  general sequence predictor is allowed to choose an action $y_t$
                  based on $x_1...x_{t-1}$ and receives loss $\ell_{x_t y_t}$ if
                  $x_t$ is the next symbol of the sequence. No assumptions are made
                  on the structure of $\ell$ (apart from being bounded) and $M$.
                  The Bayes-optimal prediction scheme $\Lambda_\xi$ based on mixture
                  $\xi$ and the Bayes-optimal informed prediction scheme
                  $\Lambda_\mu$ are defined and the total loss $L_\xi$ of
                  $\Lambda_\xi$ is bounded in terms of the total loss $L_\mu$ of
                  $\Lambda_\mu$. It is shown that $L_\xi$ is bounded for bounded
                  $L_\mu$ and $L_\xi/L_\mu\to 1$ for $L_\mu\to \infty$. Convergence
                  of the instantaneous losses is also proven.",
}

%-------------Publications-of-Marcus-Hutter-2002--------------%

@InProceedings{Hutter:02feature,
  author =       "Marco Zaffalon and Marcus Hutter",
  title =        "Robust Feature Selection by Mutual Information Distributions",
  _month =        jun,
  year =         "2002",
  pages =        "577--584",
  booktitle =    "Proc. 18th International Conf. on
                  Uncertainty in Artificial Intelligence (UAI-2002)",
  _editor =       "A. Darwiche and N. Friedman",
  publisher =    "Morgan Kaufmann, San Francisco, CA",
  isbn =         "1-55860-897-4",
  http =         "http://www.hutter1.net/ai/feature.htm",
  url =          "http://arxiv.org/abs/cs.AI/0206006",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Robust feature selection, Filter approach, naive Bayes classifier,
                  Mutual Information, Cross Entropy, Dirichlet distribution, Second
                  order distribution, Bayesian statistics, Credible intervals,
                  expectation and variance of mutual information, missing data.",
  abstract =     "Mutual information is widely used in artificial intelligence, in a
                  descriptive way, to measure the stochastic dependence of discrete random
                  variables. In order to address questions such as the reliability of the
                  empirical value, one must consider sample-to-population inferential
                  approaches. This paper deals with the distribution of mutual information, as
                  obtained in a Bayesian framework by a second-order Dirichlet prior
                  distribution. The exact analytical expression for the mean and an
                  analytical approximation of the variance are reported. Asymptotic
                  approximations of the distribution are proposed. The results are applied to
                  the problem of selecting features for incremental learning and
                  classification of the naive Bayes classifier. A fast, newly defined method
                  is shown to outperform the traditional approach based on empirical mutual
                  information on a number of real data sets. Finally, a theoretical
                  development is reported that allows one to efficiently extend the above
                  methods to incomplete samples in an easy and effective way.",
  znote =        "Acceptance rate: 66/192 = 34\%",
}
@InProceedings{Hutter:02selfopt,
  author =       "Marcus Hutter",
  title =        "Self-Optimizing and {P}areto-Optimal Policies in
                  General Environments based on {B}ayes-Mixtures",
  _month =        jul,
  series =       "LNAI",
  volume =       "2375",
  year =         "2002",
  pages =        "364--379",
  address =      "Sydney, Australia",
  booktitle =    "Proc. 15th Annual Conf. on Computational Learning Theory ({COLT'02})",
  _editor =       "J. Kivinen and R. H. Sloan",
  publisher =    "Springer",
  isbn =         "978-3-540-43836-6",
  doi =          "10.1007/3-540-45435-7_25",
  http =         "http://www.hutter1.net/ai/selfopt.htm",
  url =          "http://arxiv.org/abs/cs.AI/0204040",
  keywords =     "Rational agents, sequential decision theory,
                  reinforcement learning, value function, Bayes mixtures,
                  self-optimizing policies, Pareto-optimality,
                  unbounded effective horizon, (non) Markov decision
                  processes.",
  abstract =     "The problem of making sequential decisions in unknown
                  probabilistic environments is studied. In cycle $t$ action $y_t$
                  results in perception $x_t$ and reward $r_t$, where all quantities
                  in general may depend on the complete history. The perception
                  $x_t'$ and reward $r_t$ are sampled from the (reactive)
                  environmental probability distribution $\mu$. This very general
                  setting includes, but is not limited to, (partial observable, k-th
                  order) Markov decision processes. Sequential decision theory tells
                  us how to act in order to maximize the total expected reward,
                  called value, if $\mu$ is known. Reinforcement learning is usually
                  used if $\mu$ is unknown. In the Bayesian approach one defines a
                  mixture distribution $\xi$ as a weighted sum of distributions
                  $\nu\in\M$, where $\M$ is any class of distributions including the
                  true environment $\mu$. We show that the Bayes-optimal policy
                  $p^\xi$ based on the mixture $\xi$ is self-optimizing in the sense
                  that the average value converges asymptotically for all $\mu\in\M$
                  to the optimal value achieved by the (infeasible) Bayes-optimal
                  policy $p^\mu$ which knows $\mu$ in advance. We show that the
                  necessary condition that $\M$ admits self-optimizing policies at
                  all, is also sufficient. No other structural assumptions are made
                  on $\M$. As an example application, we discuss ergodic Markov
                  decision processes, which allow for self-optimizing policies.
                  Furthermore, we show that $p^\xi$ is Pareto-optimal in the sense
                  that there is no other policy yielding higher or equal value in
                  {\em all} environments $\nu\in\M$ and a strictly higher value in
                  at least one.",
  znote =        "Acceptance rate: 26/55 = 47\%",
}
@InProceedings{Hutter:01xentropy,
  author =       "Marcus Hutter",
  title =        "Distribution of Mutual Information",
  _month =        dec,
  booktitle =    "Advances in Neural Information Processing Systems 14",
  _editor =       "T. G. Dietterich and S. Becker and Z. Ghahramani",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "399--406",
  year =         "2002",
  isbn =         "0262042088",
  http =         "http://www.hutter1.net/ai/xentropy.htm",
  url =          "http://arxiv.org/abs/cs.AI/0112019",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Mutual Information, Cross Entropy, Dirichlet distribution, Second
                  order distribution, expectation and variance of mutual
                  information.",
  abstract =     "The mutual information of two random variables i and j with joint
                  probabilities t_ij is commonly used in learning Bayesian nets as
                  well as in many other fields. The chances t_ij are usually
                  estimated by the empirical sampling frequency n_ij/n leading to a
                  point estimate I(n_ij/n) for the mutual information. To answer
                  questions like ``is I(n_ij/n) consistent with zero?'' or ``what is
                  the probability that the true mutual information is much larger
                  than the point estimate?'' one has to go beyond the point estimate.
                  In the Bayesian framework one can answer these questions by
                  utilizing a (second order) prior distribution p(t) comprising
                  prior information about t. From the prior p(t) one can compute the
                  posterior p(t|n), from which the distribution p(I|n) of the mutual
                  information can be calculated. We derive reliable and quickly
                  computable approximations for p(I|n). We concentrate on the mean,
                  variance, skewness, and kurtosis, and non-informative priors. For
                  the mean we also give an exact expression. Numerical issues and
                  the range of validity are discussed.",
  znote =        "Acceptance rate: 196/660 = 30\%",
}
@InProceedings{Hutter:02fuss,
  author =       "Marcus Hutter",
  title =        "Fitness Uniform Selection to Preserve Genetic Diversity",
  booktitle =    "Proc. 2002 Congress on Evolutionary Computation (CEC-2002)",
  address =      "Honolulu, HI, USA",
  publisher =    "IEEE",
  ISSN =         "1098-7576",
  _month =        may,
  year =         "2002",
  pages =        "783--788",
  keywords =     "Evolutionary algorithms, fitness uniform selection strategy,
                  preserve diversity, local optima, evolution,
                  correlated recombination, crossover.",
  http =         "http://www.hutter1.net/ai/pfuss.htm",
  url =          "http://arxiv.org/abs/cs.AI/0103015",
  abstract =     "In evolutionary algorithms, the fitness of a population increases
                  with time by mutating and recombining individuals and by a biased
                  selection of more fit individuals. The right selection pressure is
                  critical in ensuring sufficient optimization progress on the one
                  hand and in preserving genetic diversity to be able to escape from
                  local optima on the other. We propose a new selection scheme,
                  which is uniform in the fitness values. It generates selection
                  pressure towards sparsely populated fitness regions, not
                  necessarily towards higher fitness, as is the case for all other
                  selection schemes. We show that the new selection scheme can be
                  much more effective than standard selection schemes.",
  znote =        "Acceptance rate: 264/372 = 71\%",
}
@Article{Hutter:02fast,
  author =       "Marcus Hutter",
  title =        "The Fastest and Shortest Algorithm for All Well-Defined Problems",
  journal =      "International Journal of Foundations of Computer Science",
  publisher =    "World Scientific",
  volume =       "13",
  number =       "3",
  pages =        "431--443",
  year =         "2002",
  keywords =     "Acceleration, Computational Complexity,
                  Algorithmic Information Theory, Kolmogorov Complexity, Blum's
                  Speed-up Theorem, Levin Search.",
  http =         "http://www.hutter1.net/ai/pfastprg.htm",
  url =          "http://arxiv.org/abs/cs.CC/0206022",
  abstract =     "An algorithm M is described that solves any well-defined problem
                  p as quickly as the fastest algorithm computing a solution to
                  p, save for a factor of 5 and low-order additive terms. M
                  optimally distributes resources between the execution of provably
                  correct p-solving programs and an enumeration of all proofs,
                  including relevant proofs of program correctness and of time
                  bounds on program runtimes. M avoids Blum's speed-up theorem by
                  ignoring programs without correctness proof. M has broader
                  applicability and can be faster than Levin's universal search, the
                  fastest method for inverting functions save for a large
                  multiplicative constant. An extension of Kolmogorov complexity and
                  two novel natural measures of function complexity are used to show
                  that the most efficient program computing some function f is
                  also among the shortest programs provably computing f.",
  press =        "http://guide.supereva.it/c_/interventi/2001/04/38469.shtml",
}
@Article{Hutter:02uspatent,
  author =       "Marcus Hutter",
  title =        "System and method for analysing and displaying two- or three-dimensional sets of data",
  volume =       "number US2002041701, pages 1--15",
  journal =      "{\rm BrainLAB}, US patent",
  year =         "2002",
  url =          "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=US2002041701&ID=US2002041701A1+I+",
}

%-------------Publications-of-Marcus-Hutter-2001--------------%

@Article{Hutter:01eupatent,
  author =       "Marcus Hutter",
  title =        "{S}tufenfreie {D}arstellung von zwei- oder dreidimensionalen Datens{\"a}tzen durch kr{\"u}mmungsminimierende {V}erschiebung von {P}ixelwerten",
  volume =       "number EP1184812, pages 1--19",
  journal =      "{\rm BrainLAB}, EU patent",
  year =         "2001",
  url =          "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=EP1184812&ID=EP+++1184812A1+I+",
}
@InProceedings{Hutter:01market,
  author =       "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber",
  title =        "Market-Based Reinforcement Learning in Partially Observable Worlds",
  address =      "Vienna",
  _month =        aug,
  year =         "2001",
  pages =        "865--873",
  booktitle =    "Proc. International Conf. on Artificial Neural Networks (ICANN-2001)",
  _journal =      "Artificial Neural Networks (ICANN-2001)",
  _editor =      "Georg Dorffner and Horst Bishof and Kurt Hornik",
  publisher =    "Springer",
  series =       "LNCS",
  volume =       "2130",
  http =         "http://www.hutter1.net/ai/pmarket.htm",
  url =          "http://arxiv.org/abs/cs.AI/0105025",
  categories =   "I.2.   [Artificial Intelligence]",
  keywords =     "Hayek system; reinforcement learning; partial observable environment",
  abstract =     "Unlike traditional reinforcement learning (RL), market-based
                  RL is in principle applicable to worlds described by partially
                  observable Markov Decision Processes (POMDPs), where an agent needs
                  to learn short-term memories of relevant previous events in order to
                  execute optimal actions.  Most previous work, however, has focused
                  on reactive settings (MDPs) instead of POMDPs.  Here we reimplement
                  a recent approach to market-based RL and for the first time evaluate
                  it in a toy POMDP setting.",
  znote =        "Acceptance rate: 171/300 = 57\%",
}
@InProceedings{Hutter:01loss,
  author =       "Marcus Hutter",
  title =        "General Loss Bounds for Universal Sequence Prediction",
  year =         "2001",
  pages =        "210--217",
  booktitle =    "Proc. 18th International Conf. on Machine Learning (ICML-2001)",
  address =      "Williamstown, MA",
  _editor =       "Carla. E. Brodley and Andrea Pohoreckyj Danyluk",
  publisher =    "Morgan Kaufmann",
  isbn =         "1-55860-778-1",
  ISSN =         "1049-1910",
  http =         "http://www.hutter1.net/ai/ploss.htm",
  url =          "http://arxiv.org/abs/cs.AI/0101019",
  categories =   "I.2.   [Artificial Intelligence],
                  I.2.6. [Learning],
                  I.2.8. [Problem Solving, Control Methods and Search],
                  F.1.3. [Complexity Classes].",
  keywords =     "Bayesian and deterministic prediction; general loss function;
                  Solomonoff induction; Kolmogorov complexity; leaning; universal
                  probability; loss bounds; games of chance; partial and delayed
                  prediction; classification.",
  abstract =     "The Bayesian framework is ideally suited for induction problems.
                  The probability of observing $x_k$ at time $k$, given past
                  observations $x_1...x_{k-1}$ can be computed with Bayes rule if
                  the true distribution $\mu$ of the sequences $x_1x_2x_3...$ is
                  known. The problem, however, is that in many cases one does not
                  even have a reasonable estimate of the true distribution. In order
                  to overcome this problem a universal distribution $\xi$ is defined
                  as a weighted sum of distributions $\mu_i\in M$, where $M$ is
                  any countable set of distributions including $\mu$. This is a
                  generalization of Solomonoff induction, in which $M$ is the set of
                  all enumerable semi-measures. Systems which predict $y_k$, given
                  $x_1...x_{k-1}$ and which receive loss $l_{x_k y_k}$ if $x_k$ is
                  the true next symbol of the sequence are considered. It is proven
                  that using the universal $\xi$ as a prior is nearly as good as
                  using the unknown true distribution $\mu$. Furthermore, games of
                  chance, defined as a sequence of bets, observations, and rewards
                  are studied. The time needed to reach the winning zone is
                  estimated. Extensions to arbitrary alphabets, partial and delayed
                  prediction, and more active systems are discussed.",
  znote =        "Acceptance rate: 80/249 = 32\%",
}
@InProceedings{Hutter:01alpha,
  author =       "Marcus Hutter",
  title =        "Convergence and Error bounds for Universal Prediction of Nonbinary Sequences",
  booktitle =    "Proc. 12th European Conf. on Machine Learning (ECML-2001)",
  address =      "Freiburg, Germany",
  _editor =      "Luc De Raedt and Peter Flach",
  publisher =    "Springer",
  series =       "LNAI",
  volume =       "2167",
  isbn =         "3-540-42536-5",
  _month =        dec,
  year =         "2001",
  pages =        "239--250",
  http =         "http://www.hutter1.net/ai/palpha.htm",
  url =          "http://arxiv.org/abs/cs.LG/0106036",
  keywords =     "Induction; Solomonoff, Bayesian, deterministic
                  prediction; Kolmogorov complexity; leaning; Loss function;
                  algorithmic information theory; universal probability",
  abstract =     "Solomonoff's uncomputable universal prediction scheme $\xi$ allows
                  to predict the next symbol $x_k$ of a sequence $x_1...x_{k-1}$ for
                  any Turing computable, but otherwise unknown, probabilistic
                  environment $\mu$. This scheme will be generalized to arbitrary
                  environmental classes, which, among others, allows the
                  construction of computable universal prediction schemes $\xi$.
                  Convergence of $\xi$ to $\mu$ in a conditional mean squared sense
                  and with $\mu$ probability $1$ is proven. It is shown that the
                  average number of prediction errors made by the universal $\xi$
                  scheme rapidly converges to those made by the best possible
                  informed $\mu$ scheme. The schemes, theorems and proofs are given
                  for general finite alphabet, which results in additional
                  complications as compared to the binary case.
                  Several extensions of the presented theory and
                  results are outlined. They include general loss functions and
                  bounds, games of chance, infinite alphabet, partial and delayed
                  prediction, classification, and more active
                  systems.",
  znote =        "Acceptance rate: 90/240 = 37\% (includes PKDD)",
}
@InProceedings{Hutter:01grep,
  author =       "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber",
  title =        "Gradient-based Reinforcement Planning in Policy-Search Methods",
  year =         "2001",
  pages =        "27--29",
  address =      "Utrecht, The Netherlands",
  booktitle =    "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)",
  volume =       "27",
  _editor =       "Marco A. Wiering",
  publisher =    "Onderwijsinsituut CKI, Utrecht Univ.",
  _series =       "Cognitieve Kunstmatige Intelligentie",
  isbn =         "90-393-2874-9",
  ISSN =         "1389-5184",
  keywords =     "Artificial intelligence, reinforcement learning, direct policy search,
                  planning, gradient decent.",
  http =         "http://www.hutter1.net/ai/pgrep.htm",
  url =          "http://arxiv.org/abs/cs.AI/0111060",
  categories =   "I.2.   [Artificial Intelligence],
                  I.2.6. [Learning],
                  I.2.8. [Problem Solving, Control Methods and Search]",
  abstract =     "We introduce a learning method called ``gradient-based reinforcement
                  planning'' (GREP). Unlike traditional DP methods that improve their
                  policy backwards in time, GREP is a gradient-based method that plans
                  ahead and improves its policy {\em before} it actually acts in the
                  environment. We derive formulas for the exact policy gradient that
                  maximizes the expected future reward and confirm our ideas
                  with numerical experiments.",
}
@InProceedings{Hutter:01decision,
  author =       "Marcus Hutter",
  title =        "Universal Sequential Decisions in Unknown Environments",
  year =         "2001",
  pages =        "25--26",
  address =      "Utrecht, The Netherlands",
  booktitle =    "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)",
  volume =       "27",
  _editor =       "Marco A. Wiering",
  publisher =    "Onderwijsinsituut CKI, Utrecht Univ.",
  _series =       "Cognitieve Kunstmatige Intelligentie",
  isbn =         "90-393-2874-9",
  ISSN =         "1389-5184",
  keywords =     "Artificial intelligence, Rational agents,
                  sequential decision theory, universal Solomonoff induction,
                  algorithmic probability, reinforcement learning, computational
                  complexity, Kolmogorov complexity.",
  url =          "http://www.hutter1.net/ai/pdecision.htm",
  categories =   "I.2.   [Artificial Intelligence],
                  I.2.6. [Learning],
                  I.2.8. [Problem Solving, Control Methods and Search],
                  F.1.3. [Complexity Classes],
                  F.2.   [Analysis of Algorithms and Problem Complexity]",
  abstract =     "We give a brief introduction to the AIXI model, which unifies and
                  overcomes the limitations of sequential decision theory and
                  universal Solomonoff induction. While the former theory is suited
                  for active agents in known environments, the latter is suited for
                  passive prediction of unknown environments.",
  abstract2 =    "Decision theory formally solves the problem of rational agents in
                  uncertain worlds if the true environmental probability
                  distribution is known. Solomonoff's theory of universal induction
                  formally solves the problem of sequence prediction for unknown
                  distribution. We unify both theories and give strong arguments
                  that the resulting universal AIXI model behaves optimal in any
                  computable environment.",
}
@InProceedings{Hutter:01aixi,
  author =       "Marcus Hutter",
  title =        "Towards a Universal Theory of Artificial Intelligence based on Algorithmic
                  Probability and Sequential Decisions",
  year =         "2001",
  pages =        "226--238",
  booktitle =    "Proc. 12th European Conf. on
                  Machine Learning (ECML-2001)",
  address =      "Freiburg, Germany",
  _editor =      "Luc De Raedt and Peter Flach",
  publisher =    "Springer",
  series =       "LNAI",
  volume =       "2167",
  isbn =         "3-540-42536-5",
  keywords =     "Artificial intelligence, Rational agents,
                  sequential decision theory, universal Solomonoff induction,
                  algorithmic probability, reinforcement learning, computational
                  complexity, theorem proving, probabilistic reasoning, Kolmogorov
                  complexity, Levin search.",
  http =         "http://www.hutter1.net/ai/paixi.htm",
  url =          "http://arxiv.org/abs/cs.AI/0012011",
  categories =   "I.2.   [Artificial Intelligence],
                  I.2.3. [Deduction and Theorem Proving],
                  I.2.6. [Learning],
                  I.2.8. [Problem Solving, Control Methods and Search],
                  F.1.3. [Complexity Classes],
                  F.2.   [Analysis of Algorithms and Problem Complexity]",
  abstract =     "Decision theory formally solves the problem of rational agents in
                  uncertain worlds if the true environmental probability
                  distribution is known. Solomonoff's theory of universal induction
                  formally solves the problem of sequence prediction for unknown
                  distribution. We unify both theories and give strong arguments
                  that the resulting universal AIXI model behaves optimally in any
                  computable environment. The major drawback of the AIXI model is
                  that it is uncomputable. To overcome this problem, we construct a
                  modified algorithm AIXI^tl, which is still superior to any
                  other time t and space l bounded agent. The computation time
                  of AIXI^tl is of the order t x 2^l.",
  znote =        "Acceptance rate: 90/240 = 37\% (includes PKDD)",
}
@Article{Hutter:01errbnd,
  author =       "Marcus Hutter",
  title =        "New Error Bounds for {Solomonoff} Prediction",
  year =         "2001",
  volume =       "62",
  number =       "4",
  pages =        "653--667",
  journal =      "Journal of Computer and System Sciences",
  address =      "Manno(Lugano), Switzerland",
  keywords =     "Kolmogorov Complexity, Solomonoff Prediction, Error
                 Bound, Induction, Learning, Algorithmic Information
                 Theory, Bayes",
  http =         "http://www.hutter1.net/ai/perrbnd.htm",
  url =          "http://arxiv.org/abs/cs.AI/9912008",
  abstract =     "Several new relations between Solomonoff prediction
                  and Bayesian prediction and general probabilistic
                  prediction schemes will be proved. Among others they
                  show that the number of errors in Solomonoff prediction
                  is finite for computable prior probability, if finite
                  in the Bayesian case. Deterministic variants will also
                  be studied. The most interesting result is that the
                  deterministic variant of Solomonoff prediction is
                  optimal compared to any other probabilistic or
                  deterministic prediction scheme apart from additive
                  square root corrections only. This makes it well suited
                  even for difficult prediction problems, where it does
                  not suffice when the number of errors is minimal to
                  within some factor greater than one. Solomonoff's
                  original bound and the ones presented here complement
                  each other in a useful way.",
}

%-------------Publications-of-Marcus-Hutter-2000--------------%

@Article{Hutter:00speed,
  author =       "Marcus Hutter",
  title =        "An effective Procedure for Speeding up Algorithms",
  year =         "10 pages, 2001",
  journal =      "Presented at the 3rd Workshop on Algorithmic Information Theory (TAI-2001)",
  http =         "http://www.hutter1.net/ai/pspeed.htm",
  url =          "http://arxiv.org/abs/cs.CC/0102018",
  keywords =     "Acceleration, Computational Complexity,
                  Algorithmic Information Theory, Blum's Speed-up, Levin Search.",
  abstract =     "The provably asymptotically fastest algorithm within a factor of 5
                  for formally described problems will be constructed. The main idea
                  is to enumerate all programs provably equivalent to the original
                  problem by enumerating all proofs. The algorithm could be
                  interpreted as a generalization and improvement of Levin search,
                  which is, within a multiplicative constant, the fastest algorithm
                  for inverting functions. Blum's speed-up theorem is avoided by
                  taking into account only programs for which a correctness proof
                  exists. Furthermore, it is shown that the fastest program that
                  computes a certain function is also one of the shortest programs
                  provably computing this function. To quantify this statement, the
                  definition of Kolmogorov complexity is extended, and two new
                  natural measures for the complexity of a function are defined.",
}
@TechReport{Hutter:00kcunai,
  author =       "Marcus Hutter",
  title  =       "A Theory of Universal Artificial Intelligence based on Algorithmic Complexity",
  number =       "cs.AI/0004001",
  _month =        apr,
  year =         "2000",
  institution =  "M{\"u}nchen, 62 pages",
  keywords =     "Artificial intelligence, algorithmic complexity,
                  sequential decision theory; induction; Solomonoff; Kolmogorov;
                  Bayes; reinforcement learning; universal sequence prediction;
                  strategic games; function minimization; supervised learning.",
  url =          "http://arxiv.org/abs/cs.AI/0004001",
  http =         "http://www.hutter1.net/ai/pkcunai.htm",
  abstract =     "Decision theory formally solves the problem of rational agents in
                  uncertain worlds if the true environmental prior probability
                  distribution is known. Solomonoff's theory of universal induction
                  formally solves the problem of sequence prediction for unknown
                  prior distribution. We combine both ideas and get a parameterless
                  theory of universal Artificial Intelligence. We give strong
                  arguments that the resulting AIXI model is the most intelligent
                  unbiased agent possible. We outline for a number of problem
                  classes, including sequence prediction, strategic games, function
                  minimization, reinforcement and supervised learning, how the
                  AIXI model can formally solve them. The major drawback of the
                  AIXI model is that it is uncomputable. To overcome this
                  problem, we construct a modified algorithm AIXI-tl, which is
                  still effectively more intelligent than any other time t and
                  space l bounded agent. The computation time of AIXI-tl
                  is of the order tx2^l. Other discussed topics are formal
                  definitions of intelligence order relations, the horizon problem
                  and relations of the AIXI theory to other AI approaches.",
  note =         "http://arxiv.org/abs/cs.AI/0004001",
}

%----------Publications-of-Marcus-Hutter-1987-1999------------%

@Article{Hutter:97instanto,
  author =       "Marcus Hutter",
  title =        "Instantons and Meson Correlators in {QCD}",
  year =         "1997",
  pages =        "131--143",
  journal =      "Zeitschrift f{\"u}r Physik C Particle and Fields",
  volume =       "74",
  number =       "1",
  issn =         "0170-9739",
  doi =          "10.1007/s002880050376",
  url =          "http://arxiv.org/abs/hep-ph/9501245",
  http =         "http://www.hutter1.net/physics/pinstant.htm",
  abstract =     "Various QCD correlators are calculated in the instanton liquid model
                  in zeromode approximation and $1/N_c$ expansion. Previous works are
                  extended by including dynamical quark loops. In contrast to the
                  original ``perturbative'' $1/N_c$ expansion not all quark loops are
                  suppressed. In the flavor singlet meson correlators a chain of quark
                  bubbles survives the $N_c\to\infty$ limit causing a massive
                  $\eta^\prime$ in the pseudoscalar correlator while keeping massless
                  pions in the triplet correlator. The correlators are plotted and
                  meson masses and couplings are obtained from a spectral fit. They
                  are compared to the values obtained from numerical studies of the
                  instanton liquid and to experimental results.",
}
@Article{Hutter:97family,
  author =       "Andreas Blumhofer and Marcus Hutter",
  title =        "Family Structure from Periodic Solutions of an Improved Gap Equation",
  journal =      "Nuclear Physics",
  volume =       "B484",
  year =         "1997",
  pages =        "80--96",
  doi =          "10.1016/S0550-3213(96)00644-X",
  issn =         "0550-3213",
  url =          "http://arxiv.org/abs/hep-ph/9605393",
  http =         "http://www.hutter1.net/physics/pfamily.htm",
  abstract =     "Fermion mass models usually contain a horizontal symmetry and
                  therefore fail to predict the exponential mass spectrum of the Standard
                  Model in a natural way. In dynamical symmetry breaking there are
                  different concepts to introduce a fermion mass spectrum, which
                  automatically has the desired hierarchy. In constructing a specific
                  model we show that in some modified gap equations periodic solutions
                  with several fermion poles appear. The stability of these excitations
                  and the application of this toy model are discussed. The mass ratios
                  turn out to be approximately e^pi and e^2pi. Thus the model explains
                  the large ratios of fermion masses between successive generations in
                  the Standard Model without introducing large or small numbers by hand.",
  note =         "Missing figures in B494 (1997) 485",
}
@PhdThesis{Hutter:96thesis,
  author =       "Marcus Hutter",
  school =       "Faculty for Theoretical Physics, LMU Munich",
  title =        "Instantons in QCD: Theory and application of the instanton liquid model",
  year =         "1996",
  pages =        "1--100",
  url =          "http://arxiv.org/abs/hep-ph/0107098 ",
  http =         "http://www.hutter1.net/physics/pdise.htm",
  abstract =     "Numerical and analytical studies of the instanton liquid model have
                  allowed the determination of many hadronic parameters during the
                  last 13 years. Most part of this thesis is devoted to the extension
                  of the analytical methods. The meson correlation (polarization)
                  functions are calculated in the instanton liquid model including
                  dynamical quark loops. The correlators are plotted and masses and
                  couplings of the sigma, rho, omega, a1 and f1 are obtained from a
                  spectral fit. A separated analysis allows the determination of the
                  eta' mass too. The results agree with the experimental values on
                  a 10% level. Further I give some predictions for the proton form
                  factors, which are related to the proton spin (problem). A gauge
                  invariant gluon mass for small momenta is also calculated. At the
                  end of the work some predictions are given, which do not rely on
                  the instanton liquid model. A gauge invariant quark propagator is
                  calculated in the one instanton background and is compared to the
                  regular and singular propagator. An introduction to the skill of
                  choosing a suitable gauge, especially a criterion for choosing regular
                  or singular gauge, is given. An application is the derivation of a
                  finite relation between the quark condensate and the QCD scale Lambda,
                  where neither an infrared cutoff nor a specific instanton model has
                  been used. In general the instanton liquid model exhibits an astonishing
                  internal consistency and a good agreement with the experimental data.",
  note =         "Translated from the German original http://www.hutter1.net/physics/pdiss.htm",
}
@PhdThesis{Hutter:96diss,
  author =       "Marcus Hutter",
  school =       "Fakult{\"a}t f{\"u}r Theoretische Physik, LMU M{\"u}nchen",
  title =        "Instantonen in der QCD: Theorie und Anwendungen des Instanton-Fl{\"u}ssigkeit-Modells",
  year =         "1996",
  pages =        "1--105",
  url =          "http://arxiv.org/abs/hep-ph/9603280",
  http =         "http://www.hutter1.net/physics/pdiss.htm",
  abstract =     "Durch numerische Simulation des Instanton-Flüssigkeit-Modells
                  konnten eine Reihe hadronischer Größen in den letzten 13 Jahren
                  bestimmt werden. Der größte Teil dieser Arbeit ist der Erweiterung
                  der analytischen Methoden gewidmet. Die Meson-Korrelatoren
                  (auch Polarisations-Funktionen genannt) werden im Instanton-Flüssigkeits-Modell
                  berechnet, wobei dynamische Quark-Schleifen berücksichtigt werden.
                  Die Korrelatoren werden grafisch dargestellt und die Massen und Kopplungen
                  der sigma, rho, omega, a1 und f1 Mesonen werden mit Hilfe eines spektralen
                  Fits bestimmt. Eine gesonderte Betrachtung ermöglicht auch die Berechnung
                  der eta' Masse. Die Ergebnisse stimmen auf 10% Niveau mit den experimentellen
                  Werten überein. Weiterhin wird versucht, die axialen Formfaktoren des Protons
                  zu bestimmen. Diese stehen in Zusammenhang mit dem Proton-Spin(-Problem).
                  Eine eichinvariante Gluon-Masse wird für kleine Impulse berechnet.
                  Die Arbeit wird abgeschlossen mit einigen Vorhersagen, die sich nicht
                  speziell auf das Instanton-Flüssigkeits-Modell stützen. Im
                  ein-Instanton-Vakuum wird ein eichinvarianter Quark-Propagator berechnet
                  und mit dem regulüren und dem singulären Propagator verglichen.
                  Kriterien für die Wahl einer geeignete Eichung, insbesondere für die
                  Wahl der singulären oder der regulüren Eichung, werden gegeben.
                  Eine Anwendung ist die Herleitung einer endlichen Relation zwischen
                  dem Quark-Kondensat und der QCD-Skala Lambda, wobei weder ein
                  Infrarot-Cutoff noch ein spezifisches Instanton-Modell verwendet werden.
                  Allgemein weist das Instanton-Flüssigkeits-Modell eine erstaunliche interne
                  Konsistenz und gute Übereinstimmung mit experimentellen Daten auf.",
  note =         "English translation available at http://www.hutter1.net/physics/pdise.htm",
}
@Article{Hutter:96eta,
  author =       "Marcus Hutter",
  title =        "The mass of the $\eta'$ in self-dual {QCD}",
  year =         "1996",
  pages =        "275--278",
  journal =      "Physics Letters B",
  volume =       "B367",
  issn =         "0370-2693",
  doi =          "10.1016/0370-2693(95)01411-X",
  url =          "http://arxiv.org/abs/hep-ph/9509401",
  http =         "http://www.hutter1.net/physics/petamas.htm",
  abstract =     "The QCD gauge field is modeled as an ensemble of statistically
                  independent selfdual and antiselfdual regions. This model is
                  motivated from instanton physics. The scale anomaly then allows
                  to relate the topological susceptibility to the gluon condensate.
                  With the help of Wittens formula for m_eta' and an estimate of
                  the suppression of the gluon condensate due to light quarks the
                  mass of the eta' can be related to f_pi and the physical gluon
                  condensate. We get the quite satisfactory value m_eta'=884+-116 MeV.
                  Using the physical eta' mass as an input it is in principle possible
                  to get information about the interaction between instantons and
                  anti-instantons.",
}
@TechReport{Hutter:95spin,
  author =       "Marcus Hutter",
  number =       "LMU-95-15",
  institution =  "Theoretische Physik, LMU M{\"u}nchen",
  title =        "Proton Spin in the Instanton Background",
  year =         "1995",
  url =          "http://arxiv.org/abs/hep-ph/9509402",
  http =         "http://www.hutter1.net/physics/pspin.htm",
  abstract =     "The proton form factors are reduced to vacuum correlators
                  of 4 quark fields by assuming independent constituent
                  quarks. The axial singlet quark and gluonic form factors
                  are calculated in the instanton liquid model. A discussion
                  of gauge(in)dependence is given.",
  note =          "15 pages",
}
@TechReport{Hutter:95prop,
  author =       "Marcus Hutter",
  number =       "LMU-95-03",
  institution =  "Theoretische Physik, LMU M{\"u}nchen",
  title =        "Gauge Invariant Quark Propagator in the Instanton Background",
  year =         "1995",
  url =          "http://arxiv.org/abs/hep-ph/9502361",
  http =         "http://www.hutter1.net/physics/pprop.htm",
  abstract =     "After a general discussion on the choice of gauge, we compare
                  the quark propagator in the background of one instanton in
                  regular and singular gauge with a gauge invariant propagator
                  obtained by inserting a path-ordered gluon exponential.
                  Using a gauge motivated by this analysis, we were able to
                  obtain a finite result for the quark condensate without
                  introducing an infrared cutoff nor invoking some instanton
                  model.",
  note =        "15 pages",
}
@TechReport{Hutter:93gluon,
  author =       "Marcus Hutter",
  number =       "LMU-93-18",
  institution =  "Theoretische Physik, LMU M{\"u}nchen",
  title =        "Gluon Mass from Instantons",
  year =         "1993",
  url =          "http://arxiv.org/abs/hep-ph/9501335",
  http =         "http://www.hutter1.net/physics/pgluon.htm",
  abstract =     "The gluon propagator is calculated in the instanton background
                  in a form appropriate for extracting the momentum dependent
                  gluon mass. In background-xi-gauge we get for the mass 400 MeV
                  for small p^2 independent of the gauge parameter xi.",
  note =         "13 pages",
}
@MastersThesis{Hutter:91cfs,
  author =       "Marcus Hutter",
  school =       "Theoretische Informatik, TU M{\"u}nchen",
  title =        "{I}mplementierung eines {K}lassifizierungs-{S}ystems",
  year =         "1991",
  url =          "http://www.hutter1.net/ai/pcfs.htm",
  ps =           "http://www.hutter1.net/ai/pcfs.ps",
  pdf =          "http://www.hutter1.net/ai/pcfs.pdf",
  code =         "http://www.hutter1.net/ai/cfssim.c",
  codex =        "http://www.hutter1.net/ai/cfsexmpl.c",
  abstract =     "A classifier system is a massively parallel rule based system,
                  whose components (classifier) can exchange messages, whose behavior is
                  is assessed by a teacher (reinforcement), and which is able to learn by
                  means of credit assignment and a genetic algorithm. For an introduction
                  we have to refer to the, meanwhile extensive, literature; see especially
                  Goldberg (1989). The concept of a classifier system was first developed
                  by Holland (1986), but meanwhile a multitude of variants and extensions
                  exist (Booker et. al, 1989). So far it is impossible to
                  compare these variants in their performance, statements on the
                  quality of the various approaches are, hence, hard to impossible.
                  The program developed in this diploma thesis allows, for the first time,
                  a direct comparison of the most important variants.
                  The thesis describes the program, in which we have taken special attention
                  to an efficient implementation.",
  zusammenfassung = "Ein Klassifizierungssystem (CFS, engl. Classifiersystem) ist
                  ein massiv paralleles regelbasiertes System, dessen Komponenten
                  (Classifier) Nachrichten (Messages) austauschen können, dessen
                  Verhalten von einem Lehrer beurteilt wird (Reinforcement) und
                  das mittels Credit-Assignment und genetischen Algorithmen fähig
                  ist zu lernen. Für eine einführende Darstellung muß auf die
                  inzwischen sehr umfangreiche Literatur, insbesondere Goldberg (1989),
                  verwiesen werden. Das Konzept des CFS wurde zuerst von Holland (1986)
                  entwickelt, inzwischen gibt es aber eine Vielzahl von Varianten und
                  Erweiterungen (Booker et. al (1989). Bisher ist es nicht möglich,
                  diese Varianten in ihrer Performance zu vergleichen, eine Aussage
                  über die Güte der verschiedenen Ansätze ist somit kaum oder
                  überhaupt nicht möglich. Das in dieser Diplomarbeit erstellte
                  Programm gestattet erstmals bzgl. der wichtigsten Varianten einen
                  direkten Vergleich. In den folgenden Kapiteln wird dieses Programm,
                  bei dem besonders auf eine effiziente Implementierung geachtet wurde,
                  beschrieben.",
  note =         "72 pages with C listing, in German",
}
@TechReport{Hutter:90faka,
  author =       "Marcus Hutter",
  institution =  "Universit{\"a}t Erlangen-N{\"u}rnberg \&
                  Technische Universit{\"a}t M{\"u}nchen",
  title =        "{P}arallele {A}lgorithmen in der {S}tr{\"o}mungsmechanik",
  type =         "{F}erienakademie: {N}umerische {M}ethoden der {S}tr{\"o}mungsmechanik",
  year =         "1990",
  url =          "http://www.hutter1.net/official/faka.htm",
  note =         "10 pages, in German",
}
@TechReport{Hutter:90fopra,
  author =       "Marcus Hutter",
  institution =  "Theoretische Informatik, TU M{\"u}nchen",
  title =        "A Reinforcement Learning {H}ebb Net",
  year =         "1990",
  type =         "Fortgeschrittenenpraktikum",
  url =          "http://www.hutter1.net/ai/fopra.htm",
  ftp =          "http://www.hutter1.net/ai/fopra.ps.zip",
  pdf =          "http://www.hutter1.net/ai/fopra.pdf",
  code =         "http://www.hutter1.net/ai/fopra.pas",
  abstract =     "This Fopra is motivated by the following observations about
                  human learning and about human neural information processing.
                  On the one hand humans are able to learn supervised, unsupervised
                  and by reinforcement, on the other hand there is no neural
                  distinction between informative, uninformative and evaluative
                  feedback. Furthermore, the Hebb learning rule is the only
                  biological inspired learning mechanism. If the human brain
                  is indeed a Hebb net this would imply that Hebb nets are
                  able to learn by reinforcement. The goal of this Fopra is
                  to investigate whether and how Hebb nets could be used for
                  reinforcement learning. It is shown that Hebb nets with a
                  suitable prior net topology can indeed learn, at least
                  simple tasks, by reinforcement.",
  note =         "30 pages with Pascal listing, in German",
}
@Article{Hutter:87cad,
  author =       "Marcus Hutter",
  title =        "Fantastische {3D-Graphik} mit dem {CPC-Giga-CAD}",
  journal =      "7. Schneider Sonderheft, Happy Computer, Sonderheft 16",
  publisher =    "Markt\&Technik",
  year =         "1987",
  pages =        "41--92",
  url =          "http://www.hutter1.net/gigacad/gigacad.htm",
  abstract =     "CAD steht fur Computer Aided Design. Bis heute war dieses
                  Gebiet hauptsächlich Domäne der Großrechner.
                  Mit $\gg$CPC-Giga-CAD$\ll$ wird auch auf dem Schneider CPC
                  automatisiertes und computergestütztes Zeichnen und
                  Konstruieren zum Kinderspiel.",
}
 © 2000 by ... [home] [search] [science] [contact] [up] ... Marcus Hutter