%-------------Publications-of-Marcus-Hutter-2024--------------%
@article{Hutter:24kernelait,
title={Bridging Algorithmic Information Theory and Machine Learning: A New Approach to Kernel Learning},
author={Hamzi, Boumediene and Hutter, Marcus and Owhadi, Houman},
journal={\href{http://arxiv.org/abs/2311.12624}{arXiv:2311.12624}},
_month=aug,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#kernelait},
doi={10.1016/j.physd.2024.134153}
url={http://www.sciencedirect.com/journal/physica-d-nonlinear-phenomena/about/call-for-papers},
}
@inproceedings{Hutter:24trainsol,
title={Learning Universal Predictors},
author={Grau-Moya, Jordi and Genewein, Tim and Hutter, Marcus and Orseau, Laurent and Deletang, Gregoire and Catt, Elliot and Ruoss, Anian and Wenliang, Li Kevin and Mattern, Christopher and Aitchison, Matthew and Veness, Joel},
booktitle={41st International Conference on Machine Learning},
_month=jul,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#trainsol},
url={http://openreview.net/forum?id=B1ajnQyZgK},
arxiv={http://arxiv.org/abs/2401.14953},
press={http://medium.com/syncedreview/neural-networks-on-the-brink-of-universal-prediction-with-deepminds-cutting-edge-approach-2de9af5b4e3f},
}
@inproceedings{Hutter:24distrmerl,
title={Distributional Bellman Operators over Mean Embeddings},
author={Wenliang, Li Kevin and Deletang, Gregoire and Aitchison, Matthew and Hutter, Marcus and Ruoss, Anian and Gretton, Arthur and Rowland, Mark},
booktitle={41st International Conference on Machine Learning},
_month=jul,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#distrmerl},
url={http://openreview.net/forum?id=lGOPBl9tfb},
arxiv={http://arxiv.org/abs/2312.07358},
}
@Book{Hutter:24uaibook2,
author = "Marcus Hutter and David Quarel and Elliot Catt",
title = "An Introduction to Universal Artificial Intelligence",
series = "Chapman \& Hall/CRC Artificial Intelligence and Robotics Series",
publisher = "Taylor and Francis",
_month = may,
year = "2024",
isbn = "Paperback:9781032607023, Harcover:9781032607153, eBook:9781003460299",
pages = "500",
bibtex = "http://www.hutter1.net/official/bib.htm#uaibook2",
doi = "10.1201/9781003460299"
_note = "500+ pages, http://www.hutter1.net/ai/uaibook2.htm",
url = "http://www.hutter1.net/ai/uaibook2.htm",
http = "http://www.routledge.com/An-Introduction-to-Universal-Artificial-Intelligence/Hutter-Catt-Quarel/p/book/9781032607023",
slides = "http://www.hutter1.net/ai/suaibook.pdf",
video = "http://cartesiancafe.podbean.com/e/marcus-hutter-universal-artificial-intelligence-and-solomonoff-induction/",
keywords = "Artificial general intelligence; algorithmic information theory;
Bayes mixture distributions; universal sequence prediction;
context tree weighting; rational agents; sequential decision theory;
universal intelligent agents; reinforcement learning;
games and multi-agent systems; approximation/implementation/application;
AGI-safety; philosophy of AI.",
abstract = "`An Introduction to Universal Artificial Intelligence'
provides the formal underpinning of what it means for an agent
to act intelligently in an unknown environment.
First presented in `Universal Algorithmic Intelligence' (Hutter, 2000),
UAI offers a framework in which virtually all AI problems can be formulated,
and a theory of how to solve them.
UAI unifies ideas from sequential decision theory,
Bayesian inference, and algorithmic information theory to construct AIXI,
an optimal reinforcement learning agent
that learns to act optimally in unknown environments.
AIXI is the theoretical gold standard for intelligent behavior.
The book covers both the theoretical and practical aspects of UAI.
Bayesian updating can be done efficiently with context tree weighting,
and planning can be approximated by sampling with Monte Carlo tree search.
It provides algorithms for the reader to implement,
and experimental results to compare against.
These algorithms are used to approximate AIXI.
The book ends with a philosophical discussion of Artificial General Intelligence:
Can super-intelligent agents even be constructed?
Is it inevitable that they will be constructed,
and what are the potential consequences?
This text is suitable for late undergraduate students.
It provides an extensive chapter to fill in the required
mathematics, probability, information,
and computability theory background.",
support = "ARC grant DP150104590",
for = "010404(20%),080101(20%),080198(20%),080299(10%),080401(30%)",
}
@InProceedings{Hutter:24lmiscompr,
author = "Gregoire Deletang and Anian Ruoss and Paul-Ambroise Duquenne and Elliot Catt and Tim Genewein and Christopher Mattern and Jordi Grau-Moya and Li Kevin Wenliang and Matthew Aitchison and Laurent Orseau and Marcus Hutter and Joel Veness",
title = "Language Modeling Is Compression",
booktitle = "Proc. 12th International Conference on Learning Representations ({ICLR'24})",
_number = "DM:rh/P39768",
address = "Vienna, Austria",
_month = may,
year = "2024",
bibtex = "http://www.hutter1.net/official/bib.htm#lmiscompr",
http = "http://openreview.net/forum?id=jznbgiynus",
arxiv = "http://arxiv.org/abs/2309.10668",
pdf = "http://www.hutter1.net/publ/lmiscompr.pdf",
slides = "http://www.hutter1.net/publ/slmiscompr.pdf",
project = "http://www.hutter1.net/official/projects.htm#infoth",
press = "http://twitter.com/search?q=%22language%20modeling%20is%20compression%22",
code = "http://www.hutter1.net/publ/lmiscompr.cpp",
keywords = "lossless compression; arithmetic coding; language models; scaling laws; in-context learning",
abstract = "It has long been established that predictive models can be transformed
into lossless compressors and vice versa. Incidentally, in recent years,
the machine learning community has focused on training increasingly large
and powerful self-supervised (language) models.
Since these large language models exhibit impressive predictive capabilities,
they are well-positioned to be strong compressors.
In this work, we advocate for viewing the prediction problem
through the lens of compression and evaluate the compression capabilities of large (foundation) models.
We show that large language models are powerful general-purpose predictors
and that the compression viewpoint provides novel insights into scaling laws,
tokenization, and in-context learning. For example, Chinchilla 70B,
while trained primarily on text, compresses ImageNet patches to 43.4\%
and LibriSpeech samples to 16.4\% of their raw size,
beating domain-specific compressors like PNG (58.5\%) or FLAC (30.3\%),
respectively. Finally, we show that the prediction-compression equivalence
allows us to use any compressor (like gzip) to build a conditional generative model.",
znote = "received over a million Twitter views and was there the ``top ML paper of week'':
http://twitter.com/search?q=%22language%20modeling%20is%20compression%22
Acceptance rate: 2251/7262=31\%",
}
@inproceedings{Hutter:24cnctrafo,
title={Generative Reinforcement Learning with Transformers},
author={Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Tim Genewein and Jordi Grau and Marcus Hutter and Joel Veness},
_booktitle={Submitted to The Twelfth International Conference on Learning Representations},
_month=feb,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#cnctrafo},
url={http://openreview.net/forum?id=6qtDu7hVPF},
_note={under review},
}
@inproceedings{Hutter:24truncvalf,
title={Policy Gradient without Boostrapping via Truncated Value Learning},
author={Matthew Aitchison and Penny Sweetser and Gregoire Deletang and Marcus Hutter},
_booktitle={Submitted to The Twelfth International Conference on Learning Representations},
_month=feb,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#truncvalf},
url={http://openreview.net/forum?id=nBYDP46s5N},
_note={under review},
}
@inproceedings{Hutter:24hedgeaixi,
title={Dynamic Knowledge Injection for {AIXI} Agents},
author={Yang-Zhao, Samuel and Ng, Kee Siong and Hutter, Marcus},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={38},
number={15},
pages={16388--16397},
_month=feb,
year={2024},
bibtex={http://www.hutter1.net/official/bib.htm#hedgeaixi},
doi={http://doi.org/10.1609/aaai.v38i15.29575},
znote={Accept rate: 2342/9862=23.75\%},
}
%-------------Publications-of-Marcus-Hutter-2023--------------%
@InProceedings{Hutter:23onadallm,
title = "Revisiting Dynamic Evaluation: Online Adaptation for Large Language Models",
author= "Amal Rannen-Triki and Jorg Bornschein and Razvan Pascanu and Alexandre Galashov and Michalis Titsias and Marcus Hutter and Andras Gyorgy and Yee Whye Teh",
booktitle = "NeurIPS Workshop on Distribution Shifts: New Frontiers with Foundation Models",
_number = "DM:rh/P49871",
address = "New Orleans, USA",
_month = dec,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#onadallm",
http = "http://openreview.net/forum?id=iRz8qi7QB8",
url = "http://nips.cc/virtual/2023/80505",
pdf = "http://www.hutter1.net/publ/onadallm.pdf",
latex = "http://www.hutter1.net/publ/onadallm.tex",
slides = "http://www.hutter1.net/publ/sonadallm.pdf",
poster = "http://www.hutter1.net/publ/ponadallm.pdf",
video = "http://nips.cc/virtual/2023/80505",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "online learning; large language models; dynamic evaluation; context extension",
abstract = "We consider the problem of online finetuning the parameters of a
language model at test time, also known as dynamic evaluation.
While it is generally known that this approach improves the
overall predictive performance, especially when considering
distributional shift between training and evaluationin passing data, we
here emphasize the perspective that online-adaptation turns
parameters into temporally changing states and provides a form
of context-length extension with memory in weights, more in line
with the concept of memory in neuroscience. We pay particular
attention to the speed of adaptation (in terms of sample
efficiency), sensitivity to overall distributional drift, and
computational overhead for performing gradient computation and
parameter updates. Our empirical study provides insights on when
online adaptation is particularly interesting. We highlight that
with online adaptation the conceptual distinction between
in-context learning and finetuning blurs: Both are methods to
condition the model on previously observed tokens.",
}
@InProceedings{Hutter:23selfaixi,
author = "Elliot Catt and Jordi Grau-Moya and Marcus Hutter and Matthew Aitchison and Tim Genewein and Gregoire Deletang and Li Kevin Wenliang and Joel Veness",
title = "Self-Predictive Universal {AI}",
booktitle = "37th Conf. on Neural Information Processing Systems ({NeurIPS'23})",
pages = "1--18",
_number = "DM:rh/P34416",
_editor = "",
address = "New Orleans, USA",
_month = dec,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#selfaixi",
http = "http://openreview.net/forum?id=psXVkKO9No",
pdf = "http://www.hutter1.net/publ/selfaixi.pdf",
poster = "http://www.hutter1.net/publ/pselfaixi.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
keywords = "reinforcement learning; universal; self-prediction; Bayes; planning; on-policy; intelligence; AIXI",
abstract = "Reinforcement Learning (RL) algorithms typically utilize
learning and/or planning techniques to derive effective
policies. Integrating both approaches has proven to be highly
successful in addressing complex sequential decision-making
challenges, as evidenced by algorithms such as AlphaZero and
MuZero, which consolidate the planning process into a parametric
search-policy. AIXI, the universal Bayes-optimal agent, leverages
planning through comprehensive search as its primary means to
find an optimal policy. Here we define an alternative universal
Bayesian agent, which we call Self-AIXI, that on the contrary to
AIXI, maximally exploits learning to obtain good policies. It
does so by self-predicting its own stream of action data, which
is generated, similarly to other TD(0) agents, by taking an
action maximization step over the current on-policy (universal
mixture-policy) Q-value estimates. We prove that Self-AIXI
converges to AIXI, and inherits a series of properties like
maximal Legg-Hutter intelligence and the self-optimizing
property.",
support = "ARC grant DP150104590",
znote = "Acceptance rate: 3218/12343 = 26\%",
}
@Article{Hutter:23lscm,
author = "Laurent Orseau and Marcus Hutter",
title = "Line Search for Convex Minimization",
journal = "\href{http://arxiv.org/abs/2307.16560}{arXiv:2307.16560}",
pages = "1--19",
_month = jul,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#lscm",
pdf = "http://www.hutter1.net/publ/lscm.pdf",
project = "http://www.hutter1.net/official/projects.htm#opt",
keywords = "optimization; quasiconvex; bisection; secant; gap; guarantees",
abstract = "We propose two algorithms: $\Delta$-Bisection is a variant of
bisection search that uses (sub)gradient information and
convexity to speed up convergence, while $\Delta$-Secant is a
variant of golden-section search and uses only function queries.
Both algorithms are based on a refined definition of the
\emph{optimality region} $\Delta$ containing the minimum point,
for general convex functions. While bisection search reduces the
$x$ interval by a factor 2 at every iteration,
$\Delta$-Bisection reduces the (sometimes much) smaller
$x^*$-gap $\Delta^x$ (the $x$ coordinates of $\Delta$) by
\emph{at least} a factor 2 at every iteration. Similarly,
$\Delta$-Secant also reduces the $x^*$-gap by at least a factor
2 every second function query. Moreover, and possibly more
importantly, the $y^*$-gap $\Delta^y$ (the $y$ coordinates of
$\Delta$) also provides a refined stopping criterion, which can
also be used with other algorithms. Experiments on a few convex
functions confirm that our algorithms are always faster than
their quasiconvex counterparts, often by more than a factor 2.
We further design a \emph{quasi-exact} line search algorithm
based on $\Delta$-Secant. It can be used with gradient descent
as a replacement for backtracking line search. We also provide
convergence guarantees.",
}
@InProceedings{Hutter:23ltscm,
author = "Laurent Orseau and Marcus Hutter and Levi HS Lelis",
title = "Levin Tree Search with Context Models",
booktitle = "Proc. 32nd International Joint Conference on Artificial Intelligence ({IJCAI'23})",
_number = "DM:rh/P21589",
pages = "5622--5630",
_editor = "",
address = "Macao, China",
_month = aug,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#ltscm",
arxiv = "\href{http://arxiv.org/abs/2305.16945}{arXiv:2305.16945}",
pdf = "http://www.hutter1.net/publ/ltscm.pdf",
slides = "http://www.hutter1.net/publ/sltscm.pdf",
poster = "http://www.hutter1.net/publ/pltscm.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
code = "http://github.com/google-deepmind/levintreesearch_cm",
doi = "10.24963/ijcai.2023/624"
keywords = "universal search; context; tree; Rubik's cube; Sokoban; convexity",
abstract = "Levin Tree Search (LTS) is a search algorithm that makes use of
a policy (a probability distribution over actions) and comes
with a theoretical guarantee on the number of expansions before
reaching a goal node, depending on the quality of the policy.
This guarantee can be used as a loss function, which we call the
LTS loss, to optimize neural networks representing the policy
(LTS+NN). In this work we show that the neural network can be
substituted with parameterized context models originating from
the online compression literature (LTS+CM). We show that the LTS
loss is convex under this new model, which allows for using
standard convex optimization tools, and obtain convergence
guarantees to the optimal parameters in an online setting for a
given set of solution trajectories -- guarantees that cannot be
provided for neural networks. The new LTS+CM algorithm compares
favorably against LTS+NN on several benchmarks: Sokoban
(Boxoban), The Witness, and the 24-Sliding Tile puzzle (STP).
The difference is particularly large on STP, where LTS+NN fails
to solve most of the test instances while LTS+CM solves each
test instance in a fraction of a second. Furthermore, we show
that LTS+CM is able to learn a policy that solves the Rubik's
cube in only a few hundred expansions, which considerably
improves upon previous machine learning techniques.",
note = "\href{http://ijcai-23.org/distinguished-paper-awards/}{Distinguished paper award}",
}
@InProceedings{Hutter:23atari5,
author = "Matthew Aitchison and Penny Sweetser and Marcus Hutter",
title = "Atari-5: Distilling the Arcade Learning Environment down to Five Games",
booktitle = "Proc. 40th International Conference on Machine Learning ({ICML'23})",
_number = "DM:rh/P?????",
volume = "202",
pages = "421--438",
_editor = "Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett",
publisher = "PMLR",
address = "Hawaii, USA",
_month = jul,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#atari5",
http = "http://proceedings.mlr.press/v202/aitchison23a.html",
url = "http://openreview.net/forum?id=xRDHjO0YBo",
arxiv = "\href{http://arxiv.org/abs/2210.02019}{arXiv:2210.02019}",
pdf = "http://proceedings.mlr.press/v202/aitchison23a/aitchison23a.pdf",
slides = "http://www.hutter1.net/publ/satari5.pdf",
poster = "http://www.hutter1.net/publ/patari5.pdf",
project = "http://www.hutter1.net/official/projects.htm#ai",
code = "http://www.hutter1.net/publ/atari5.cpp",
keywords = "Atari; ALE; benchmark; RL; AGI; efficiency; evaluation",
abstract = "The Arcade Learning Environment (ALE) has become an essential
benchmark for assessing the performance of reinforcement
learning algorithms. However, the computational cost of
generating results on the entire 57-game dataset limits ALE's
use and makes the reproducibility of many results infeasible. We
propose a novel solution to this problem in the form of a
principled methodology for selecting small but representative
subsets of environments within a benchmark suite. We applied our
method to identify a subset of five ALE games, we call Atari-5,
which produces 57-game median score estimates within 10\% of
their true values. Extending the subset to 10-games recovers
80\% of the variance for log-scores for all games within the
57-game set. We show this level of compression is possible due to
a high degree of correlation between many of the games in ALE.",
znote = "Acceptance rate: 1827/6538 = 28\%",
}
@InProceedings{Hutter:23nnptw,
author = "Tim Genewein and Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Vincent Dutordoir and Jordi Grau-Moya and Laurent Orseau and Marcus Hutter and Joel Veness",
title = "Memory-Based Meta-Learning on Non-Stationary Distributions",
booktitle = "Proc. 40th International Conference on Machine Learning ({ICML'23})",
_number = "DM:rh/P23814",
volume = "202",
pages = "11173--11195",
_editor = "Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett",
publisher = "PMLR",
address = "Hawaii, USA",
_month = jul,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#nnptw",
http = "http://proceedings.mlr.press/v202/genewein23a.html",
url = "http://openreview.net/forum?id=gyHGzyIuEJ",
arxiv = "\href{http://arxiv.org/abs/2302.03067}{arXiv:2302.03067}",
pdf = "http://proceedings.mlr.press/v202/genewein23a/genewein23a.pdf",
slides = "http://www.hutter1.net/publ/snnptw.pdf",
poster = "http://www.hutter1.net/publ/pnnptw.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "Partition Tree Weighting; in-context learning; non-stationary; RNN; LSTM; Transformer; Bayes-optimal",
abstract = "Memory-based meta-learning is a technique for approximating
Bayes-optimal predictors. Under fairly general conditions,
minimizing sequential prediction error, measured by the log
loss, leads to implicit meta-learning. The goal of this work is
to investigate how far this interpretation can be realized by
current sequence prediction models and training regimes. The
focus is on piecewise stationary sources with unobserved
switching-points, which arguably capture an important
characteristic of natural language and action-observation
sequences in partially observable environments. We show that
various types of memory-based neural models, including
Transformers, LSTMs, and RNNs can learn to accurately
approximate known Bayes-optimal algorithms and behave as if
performing Bayesian inference over the latent switching-points
and the latent parameters governing the data distribution within
each segment.",
znote = "Acceptance rate: 1827/6538 = 28\%",
}
@article{Hutter:23potmmcp,
author = "Jonathon Schwartz and Hanna Kurniawati and Marcus Hutter",
title = "Combining a Meta-Policy and Monte-Carlo Planning for Scalable Type-Based Reasoning in Partially Observable Environments",
journal = "\href{http://arxiv.org/abs/2306.06067}{arXiv:2306.06067}",
_number = "DM:rh/P29163",
pages = "1--24",
_month = jun,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#potmmcp",
pdf = "http://www.hutter1.net/publ/potmmcp.pdf",
slides = "http://www.hutter1.net/publ/spotmmcp.pdf",
poster = "http://dl.acm.org/doi/10.5555/3545946.3598932",
project = "http://www.hutter1.net/official/projects.htm#uai",
keywords = "Multi-Agent; POSG; Type-Based Reasoning; Planning under Uncertainty; MCTS",
abstract = "The design of autonomous agents that can interact effectively
with other agents without prior coordination is a core problem
in multi-agent systems. Type-based reasoning methods achieve
this by maintaining a belief over a set of potential behaviours
for the other agents. However, current methods are limited in
that they assume full observability of the state and actions of
the other agent or do not scale efficiently to larger problems
with longer planning horizons. Addressing these limitations, we
propose Partially Observable Type-based Meta Monte-Carlo
Planning (POTMMCP) - an online Monte-Carlo Tree Search based
planning method for type-based reasoning in large partially
observable environments. POTMMCP incorporates a novel
meta-policy for guiding search and evaluating beliefs, allowing
it to search more effectively to longer horizons using less
planning time. We show that our method converges to the optimal
solution in the limit and empirically demonstrate that it
effectively adapts online to diverse sets of other agents across
a range of environments. Comparisons with the state-of-the art
method on problems with up to $10^{14}$ states and $10^8$
observations indicate that POTMMCP is able to compute better
solutions significantly faster.",
overleaf = "http://www.overleaf.com/project/63c934e3aecfa447d7c14968",
}
@InProceedings{Hutter:23nnchomsky,
author = "Gregoire Deletang and Anian Ruoss and Jordi Grau-Moya and Tim Genewein and Li Kevin Wenliang and Elliot Catt and Chris Cundy and Marcus Hutter and Shane Legg and Joel Veness and Pedro A Ortega",
title = "Neural Networks and the {Chomsky} Hierarchy",
booktitle = "Proc. 11th International Conference on Learning Representations ({ICLR'23})",
_number = "DM:rh/P5637",
address = "Kigali, Rwanda",
_month = may,
year = "2023",
bibtex = "http://www.hutter1.net/official/bib.htm#nnchomsky",
http = "http://openreview.net/forum?id=WbxHAzkeQcn",
arxiv = "http://arxiv.org/abs/2207.02098",
pdf = "http://www.hutter1.net/publ/nnchomsky.pdf",
slides = "http://www.hutter1.net/publ/snnchomsky.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
press = "Trended SuperHot in deeplearn.org in Jul'2022.
23Jan23 Number 9 on hacker news http://news.ycombinator.com/item?id=34485631",
keywords = "length generalization; memory-augmented neural networks; recurrent neural networks",
abstract = "Reliable generalization lies at the heart of safe ML and AI.
However, understanding when and how neural networks generalize
remains one of the most important unsolved problems in the
field. In this work, we conduct an extensive empirical study
(20'910 models, 15 tasks) to investigate whether insights from
the theory of computation can predict the limits of neural
network generalization in practice. We demonstrate that grouping
tasks according to the Chomsky hierarchy allows us to forecast
whether certain architectures will be able to generalize to
out-of-distribution inputs. This includes negative results where
even extensive amounts of data and training time never lead to
any non-trivial generalization, despite models having sufficient
capacity to fit the training data perfectly. Our results show
that, for our subset of tasks, RNNs and Transformers fail to
generalize on non-regular tasks, LSTMs can solve regular and
counter-language tasks, and only networks augmented with
structured memory (such as a stack or memory tape) can
successfully generalize on context-free and context-sensitive
tasks.",
znote = "(Spotlight) acceptance rate: (400)1590/5000 = (8\%)32\%",
}
@inproceedings{Hutter:23agmix,
title={Universal Agent Mixtures and the Geometry of Intelligence},
author={Alexander, Samuel Allen and Quarel, David and Du, Len and Hutter, Marcus},
booktitle={International Conference on Artificial Intelligence and Statistics},
pages={4231--4246},
_month=apr,
year={2023},
bibtex={http://www.hutter1.net/official/bib.htm#agmix},
organization={PMLR},
url={http://proceedings.mlr.press/v206/alexander23a.html},
arxiv={http://arxiv.org/abs/2302.06083},
video={http://youtu.be/yfzXvkZx2pw},
}
@misc{Hutter:23glcbpatent,
title={Gated linear contextual bandits},
author={Sezener, Eren and Veness, Joel William and Hutter, Marcus and Wang, Jianan and Budden, David},
_month=mar,
year={2023},
bibtex={http://www.hutter1.net/official/bib.htm#glcbpatent},
publisher={Google Patents},
note={US Patent App. 17/766,854},
}
@inproceedings{Hutter:23switchmdl,
title={Evaluating Representations with Readout Model Switching},
author={Yazhe Li and Jorg Bornschein and Marcus Hutter},
booktitle={11th International Conference on Learning Representations},
_month=mar,
year={2023},
bibtex={http://www.hutter1.net/official/bib.htm#switchmdl},
url={http://openreview.net/forum?id=Fsd-6ax4T1m},
}
@inproceedings{Hutter:23preqnn,
title={Sequential Learning of Neural Networks for Prequential {MDL}},
author={Bornschein, Jorg and Li, Yazhe and Hutter, Marcus},
booktitle={11th International Conference on Learning Representations},
_month=mar,
year={2023},
bibtex={http://www.hutter1.net/official/bib.htm#preqnn},
url={http://openreview.net/forum?id=h0MfjMHHNr},
}
@article{Hutter:23uclip,
title={U-Clip: On-Average Unbiased Stochastic Gradient Clipping},
author={Elesedy, Bryn and Hutter, Marcus},
journal={\href{http://arxiv.org/abs/2302.02971}{arXiv:2302.02971}},
year={2023},
bibtex={http://www.hutter1.net/official/bib.htm#uclip},
_month=feb,
}
%-------------Publications-of-Marcus-Hutter-2022--------------%
@article{Hutter:22ncollapsex,
title={Generalization bounds for transfer learning with pretrained classifiers},
author={Galanti, Tomer and Gyorgy, Andras and Hutter, Marcus},
journal={\href{http://arxiv.org/abs/2212.12532}{arXiv:2212.12532}},
_month=dec,
year={2022},
bibtex={http://www.hutter1.net/official/bib.htm#ncollapsex},
}
@misc{Hutter:22agiwirehs,
title={The Danger of Advanced Artificial Intelligence Controlling Its Own Feedback},
author={Michael K. Cohen and Marcus Hutter},
year={2022},
bibtex={http://www.hutter1.net/official/bib.htm#agiwirehs},
_month=oct,
journal={The Conversation},
howpublished={http://theconversation.com/the-danger-of-advanced-artificial-intelligence-controlling-its-own-feedback-190445},
}
@article{Hutter:22bbayes,
title={Beyond {Bayes}-optimality: meta-learning what you know you don't know},
author={Grau-Moya, Jordi and Deletang, Gregoire and Kunesch, Markus and Genewein, Tim and Catt, Elliot and Li, Kevin and Ruoss, Anian and Cundy, Chris and Veness, Joel and Wang, Jane and others},
journal={\href{http://arxiv.org/abs/2209.15618}{arXiv:2209.15618}},
_month=sep,
year={2022},
bibtex={http://www.hutter1.net/official/bib.htm#bbayes},
arxiv={http://arxiv.org/abs/2209.15618},
znote={Rank 1 at deeplearn.org, SuperHot for 2+ weeks,
Front page of HackerNews: http://news.ycombinator.com/item?id=32163324
http://syncedreview.com/2022/07/25/deepmind-paper-provides-a-mathematically-precise-overview-of-transformer-architectures-and-algorithms/},
}
@article{Hutter:22agiwireh,
title={Advanced artificial agents intervene in the provision of reward},
author={Cohen, Michael and Hutter, Marcus and Osborne, Michael},
journal={AI magazine},
volume={43},
number={3},
pages={282--293},
_month=aug,
year={2022},
bibtex={http://www.hutter1.net/official/bib.htm#agiwireh},
doi={http://doi.org/10.1002/aaai.12064},
}
@InProceedings{Hutter:22binaixi,
author = "Elliot Catt and Marcus Hutter and Joel Veness",
title = "On Reward Binarisation and {Bayesian} Agents",
booktitle = "15th European Workshop on Reinforcement Learning ({EWRL-15})",
_month = sep,
year = "2022",
url = "http://ewrl.files.wordpress.com/2022/09/ewrl22_submission.pdf",
pdf = "http://www.hutter1.net/publ/binaixi.pdf",
keywords = "",
note = "\url{http://ewrl.files.wordpress.com/2022/09/ewrl22_submission.pdf}",
}
@InProceedings{Hutter:22compcon,
author = "Elliot Catt and Marcus Hutter and Joel Veness",
title = "Reinforcement Learning with Information-Theoretic Actuation",
booktitle = "Proc. 15th International Conference on Artificial General Intelligence ({AGI'22})",
series = "LNCS",
volume = "13539",
pages = "188--198",
_editor = "Ben Goertzel and Matt Iklé and Alexey Potapov and Denis Ponomaryov",
publisher = "Springer",
address = "Seattle, WA, USA",
_month = aug,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#compcon",
url = "http://arxiv.org/abs/2109.15147",
pdf = "http://www.hutter1.net/publ/compcon.pdf",
slides = "http://www.hutter1.net/publ/scompcon.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-3-031-19906-6",
doi = "10.1007/978-3-031-19907-3_18"
keywords = "Reinforcement Learning; large action spaces; compression; coding; internal actions; sampling.",
abstract = "Reinforcement Learning formalises an embodied agent's
interaction with the environment through observations, rewards
and actions. But where do the actions come from? Actions are
often considered to represent something external, such as the
movement of a limb, a chess piece, or more generally, the output
of an actuator. In this work we explore and formalize a
contrasting view, namely that actions are best thought of as the
output of a sequence of internal choices with respect to an
action model. This view is particularly well-suited for
leveraging the recent advances in large sequence models as prior
knowledge for multi-task reinforcement learning problems. Our
main contribution in this work is to show how to augment the
standard MDP formalism with a sequential notion of internal
action using information-theoretic techniques, and that this
leads to self-consistent definitions of both internal and
external action value functions.",
support = "ARC grant DP150104590",
for = "461105(50%),461301(50%)",
}
@InProceedings{Hutter:22ncollapse2,
author = "Tomer Galanti and Andras Gyorgy and Marcus Hutter",
title = "Improved Generalization Bounds for Transfer Learning via Neural Collapse",
booktitle = "First Workshop on Pre-training: Perspectives, Pitfalls, and Paths Forward at ICML 2022",
_number = "DM:rh/P6844",
pages = "1--6",
address = "Baltimore, MD, USA",
_month = jul,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#ncollapse2",
url = "http://openreview.net/forum?id=VrK7pKwOhT_",
pdf = "http://www.hutter1.net/publ/ncollapse2.pdf",
latex = "http://www.hutter1.net/publ/ncollapse2.tex",
slides = "http://www.hutter1.net/publ/sncollapse2.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "neural collapse; transfer learning; classification",
abstract = "Using representations learned by large, pretrained models,
also called foundation models, in new tasks with fewer data
has been successful in a wide range of machine learning problems.
Recently, Galanti et al. (2022) introduced a theoretical framework
for studying this transfer learning setting for classification.
Their analysis is based on the recently observed phenomenon
that the features learned by overparameterized deep classification networks
show an interesting clustering property, called neural collapse (Papyan et al. 2020).
A cornerstone of their analysis demonstrates that neural collapse
generalizes from the source classes to new target classes.
However, this analysis is limited as it relies on several unrealistic assumptions.
In this work, we provide an improved theoretical analysis
significantly relaxing these modeling assumptions.",
}
@TechReport{Hutter:22exiid,
author = "Marcus Hutter",
title = "Testing Independence of Exchangeable Random Variables",
institution = "DeepMind",
address = "London, UK",
_month = oct,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#exiid",
http = "http://arxiv.org/abs/2210.12392",
pdf = "http://www.hutter1.net/publ/exiid.pdf",
slides = "http://www.hutter1.net/publ/sexiid.pdf",
video = "http://youtu.be/xb1ZYG4rP_Y",
project = "http://www.hutter1.net/official/projects.htm#stat",
keywords = "independent; identically distributed; exchangeable
random variables; statistical tests; unstructured data.",
abstract = "Given well-shuffled data, can we determine whether the data
items are statistically (in)dependent? Formally, we consider the
problem of testing whether a set of exchangeable random
variables are independent. We will show that this is possible
and develop tests that can confidently reject the null
hypothesis that data is independent and identically distributed
and have high power for (some) exchangeable distributions. We
will make no structural assumptions on the underlying sample
space. One potential application is in Deep Learning, where data
is often scraped from the whole internet, with duplications
abound, which can render data non-iid and test-set evaluation
prone to give wrong answers.",
}
@Article{Hutter:22clogames,
author = "Mikael Böörs and Tobias Wängberg and Tom Everitt and Marcus Hutter",
title = "Classification by Decomposition: A Novel Approach to Classification of Symmetric 2 x 2 Games",
journal = "Theory and Decision",
volume = "23",
number = "3",
pages = "463--508",
publisher = "Springer",
_month = oct,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#clogames",
url = "http://link.springer.com/article/10.1007/s11238-021-09850-z",
pdf = "http://www.hutter1.net/publ/clogames.pdf",
project = "http://www.hutter1.net/official/projects.htm#agents",
issn = "1573-7187",
doi = "10.1007/s11238-021-09850-z"
keywords = "Classification; Symmetric games; 2×2 Games; Decomposition; Cooperation and conflict; Simplicity",
abstract = "In this paper, we provide a detailed review of previous
classifications of 2×2 games and suggest a mathematically simple
way to classify the symmetric 2×2 games based on a decomposition
of the payoff matrix into a cooperative and a zero-sum part. We
argue that differences in the interaction between the parts is
what makes games interesting in different ways. Our claim is
supported by evolutionary computer experiments and findings in
previous literature. In addition, we provide a method for using
a stereographic projection to create a compact 2-d
representation of the game space.",
for = "460202(33%),380303(33%),460209(33%)",
}
@Article{Hutter:22oimilearn,
author = "Michael K. Cohen and Marcus Hutter and Neel Nanda",
title = "Fully General Online Imitation Learning",
journal = "Journal of Machine Learning Research",
volume = "23",
number = "334",
pages = "1--30",
publisher = "Microtome",
_month = oct,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#oimilearn",
http = "http://www.jmlr.org/papers/v23/21-0618.html",
url = "http://arxiv.org/abs/2102.08686",
pdf = "http://www.hutter1.net/publ/oimilearn.pdf",
slides = "http://www.hutter1.net/publ/soimilearn.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
keywords = "Bayesian sequence prediction; imitation learning; active learning; general environments",
abstract = "In imitation learning, imitators and demonstrators are policies
for picking actions given past interactions with the
environment. If we run an imitator, we probably want events to
unfold similarly to the way they would have if the demonstrator
had been acting the whole time. No existing work provides formal
guidance in how this might be accomplished, instead restricting
focus to environments that restart, making learning unusually
easy, and conveniently limiting the significance of any mistake.
We address a fully general setting, in which the (stochastic)
environment and demonstrator never reset, not even for training
purposes. Our new conservative Bayesian imitation learner
underestimates the probabilities of each available action, and
queries for more data with the remaining probability. Our main
result: if an event would have been unlikely had the
demonstrator acted the whole time, that event's likelihood can
be bounded above when running the (initially totally ignorant)
imitator instead. Meanwhile, queries to the demonstrator rapidly
diminish in frequency.",
support = "ARC grant DP150104590",
}
@TechReport{Hutter:22transalg,
author = "Mary Phuong and Marcus Hutter",
title = "Formal Algorithms for Transformers",
institution = "DeepMind",
address = "London, UK",
_month = jul,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#transalg",
http = "http://arxiv.org/abs/2207.09238",
pdf = "http://www.hutter1.net/publ/transalg.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "formal algorithms, pseudocode, transformers, attention, encoder,
decoder, BERT, GPT, Gopher, tokenization, training, inference.",
abstract = "This document aims to be a self-contained, mathematically
precise overview of transformer architectures and algorithms
(\emph{not} results). It covers what transformers are, how they
are trained, what they are used for, their key architectural
components, and a preview of the most prominent models. Complete
pseudocode is provided. The reader is assumed to be familiar
with basic ML terminology and simpler neural network
architectures such as MLPs.",
note = "LaTeX source available at http://arxiv.org/abs/2207.09238",
}
@InProceedings{Hutter:22ncollapse,
author = "Tomer Galanti and Andras Gyorgy and Marcus Hutter",
title = "On the Role of Neural Collapse in Transfer Learning",
booktitle = "Proc. 10th International Conference on Learning Representations ({ICLR'22})",
address = "Virtual, Earth",
_month = apr,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#ncollapse",
http = "http://openreview.net/forum?id=OOWsE-Mz-ro",
url = "http://arxiv.org/abs/2112.15121",
pdf = "http://www.hutter1.net/publ/ncollapse.pdf",
slides = "http://www.hutter1.net/publ/sncollapse.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "transfer learning; neural collapse; foundation models; few-shot learning",
abstract = "We study the ability of foundation models to learn
representations for classification that are transferable to new,
unseen classes. Recent results in the literature show that
representations learned by a single classifier over many classes
are competitive on few-shot learning problems with
representations learned by special-purpose algorithms designed
for such problems. In this paper we provide an explanation for
this behavior based on the recently observed phenomenon that the
features learned by overparameterized classification networks
show an interesting clustering property, called neural collapse.
We demonstrate both theoretically and empirically that neural
collapse generalizes to new samples from the training classes,
and -- more importantly -- to new classes as well, allowing
foundation models to provide feature maps that work well in
transfer learning and, specifically, in the few-shot setting.",
znote = "Acceptance rate: 1095/3391 = 32\%.
Trended SuperHot in deeplearn.org in Jan'2022",
}
@TechReport{Hutter:22invmdp,
author = "Marcus Hutter and Steven Hansen",
title = "Uniqueness and Complexity of Inverse MDP Models",
institution = "DeepMind",
address = "London",
number = "rh/P2466",
_month = feb,
year = "2022",
bibtex = "http://www.hutter1.net/official/bib.htm#invmdp",
pdf = "http://www.hutter1.net/publ/invmdp.pdf",
slides = "http://www.hutter1.net/publ/sinvmdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
keywords = "inverse models; reinforcement learning; causality; theory; multi-step models; planning",
abstract = "What is the action sequence aa'a'' that was likely responsible for reaching state s''' (from state s) in 3 steps?
Addressing such questions is important in causal reasoning and in reinforcement learning.
Inverse ''MDP'' models p(aa'a''|ss''') can be used to answer them.
In the traditional ``forward'' view, transition ``matrix'' p(s'|sa) and policy π(a|s) uniquely determine ``everything'':
the whole dynamics p(as'a's''a''...|s), and with it, the action-conditional state process p(s's''...|saa'a''),
the multi-step inverse models p(aa'a''...|ss^i), etc.
If the latter is our primary concern, a natural question, analogous to the forward case
is to which extent 1-step inverse model p(a|ss') plus policy π(a|s)
determine the multi-step inverse models or even the whole dynamics.
In other words, can forward models be inferred from inverse models or even be side-stepped.
This work addresses this question and variations thereof,
and also whether there are efficient decision/inference algorithms for this.",
}
%-------------Publications-of-Marcus-Hutter-2021--------------%
@Article{Hutter:21clogames,
author = "Mikael Böörs and Tobias Wängberg and Tom Everitt and Marcus Hutter",
title = "Classification by Decomposition: A Novel Approach to Classification of Symmetric 2 x 2 Games",
journal = "Theory and Decision",
volume = "TBA",
number = "TBA",
pages = "TBA 1--46",
publisher = "Springer",
_month = dec,
year = "2021/2022",
bibtex = "http://www.hutter1.net/official/bib.htm#clogames",
xurl = "http://arxiv.org/abs/2112.none",
pdf = "http://www.hutter1.net/publ/clogames.pdf",
project = "http://www.hutter1.net/official/projects.htm#agents",
issn = "1573-7187",
doi = "10.1007/s11238-021-09850-z",
keywords = "Classification; Symmetric games; 2×2 Games; Decomposition; Cooperation and conflict; Simplicity",
abstract = "In this paper, we provide a detailed review of previous
classifications of 2×2 games and suggest a mathematically simple
way to classify the symmetric 2×2 games based on a decomposition
of the payoff matrix into a cooperative and a zero-sum part. We
argue that differences in the interaction between the parts is
what makes games interesting in different ways. Our claim is
supported by evolutionary computer experiments and findings in
previous literature. In addition, we provide a method for using
a stereographic projection to create a compact 2-d
representation of the game space.",
for = "460202(33%),380303(33%),460209(33%)",
}
@TechReport{Hutter:21isotuning,
author = "Laurent Orseau and Marcus Hutter",
title = "Isotuning with Applications to Scale-Free Online Learning",
institution = "DeepMind",
address = "London",
number = "http://arxiv.org/abs/2112.14586",
pages = "1--32",
_month = dec,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#isotuning",
url = "http://arxiv.org/abs/2112.14586",
pdf = "http://www.hutter1.net/publ/isotuning.pdf",
project = "http://www.hutter1.net/official/projects.htm#mixed",
keywords = "online learning; convex optimization; regret analysis; adaptive learning rate;
scale-free; anytime; unbounded loss; unbounded domain",
abstract = "We extend and combine several tools of the literature to design
fast, adaptive, anytime and scale-free online learning
algorithms. Scale-free regret bounds must scale linearly with
the maximum loss, both toward large losses and toward very small
losses. Adaptive regret bounds demonstrate that an algorithm can
take advantage of easy data and potentially have constant
regret. We seek to develop fast algorithms that depend on as few
parameters as possible, in particular they should be anytime and
thus not depend on the time horizon. Our first and main tool,
isotuning, is a generalization of the idea of balancing the
trade-off of the regret. We develop a set of tools to design and
analyze such learning rates easily and show that they adapt
automatically to the rate of the regret (whether constant,
O(log T), O(√T), etc.) within a factor 2 of the optimal
learning rate in hindsight for the same observed quantities. The
second tool is an online correction, which allows us to obtain
centered bounds for many algorithms, to prevent the regret
bounds from being vacuous when the domain is overly large or
only partially constrained. The last tool, null updates,
prevents the algorithm from performing overly large updates,
which could result in unbounded regret, or even invalid updates.
We develop a general theory using these tools and apply it to
several standard algorithms. In particular, we (almost entirely)
restore the adaptivity to small losses of FTRL for unbounded
domains, design and prove scale-free adaptive guarantees for a
variant of Mirror Descent (at least when the Bregman divergence
is convex in its second argument), extend Adapt-ML-Prod to
scale-free guarantees, and provide several other minor
contributions about Prod, AdaHedge, BOA and Soft-Bayes.",
for = "490304(50%),461199(50%)",
}
@InProceedings{Hutter:21symintel,
author = "Samuel Allen Alexander and Marcus Hutter",
title = "Reward-Punishment Symmetric Universal Intelligence",
booktitle = "Proc. 14th Conf. on Artificial General Intelligence ({AGI'21})",
address = "San Francisco, USA",
series = "LNAI",
volume = "13154",
pages = "1--10",
_editor = "Ben Goertzel and Matthew Iklé and Alexey Potapov",
publisher = "Springer",
_month = oct,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#symintel",
url = "http://arxiv.org/abs/2110.02450",
pdf = "http://www.hutter1.net/publ/symintel.pdf",
slides = "http://www.hutter1.net/publ/ssymintel.pdf",
video = "http://youtu.be/CnsqHSCBgX0?t=30250",
qanda = "http://youtu.be/CnsqHSCBgX0?t=32165",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-030-93758-4_1",
issn = "0302-9743",
isbn = "978-3-030-93757-7",
keywords = "Universal intelligence; Intelligence measures; Reinforcement learning.",
abstract = "Can an agent's intelligence level be negative? We extend the
Legg-Hutter agent-environment framework to include punishments
and argue for an affirmative answer to that question. We show
that if the background encodings and Universal Turing Machine
(UTM) admit certain Kolmogorov complexity symmetries, then the
resulting Legg-Hutter intelligence measure is symmetric about
the origin. In particular, this implies reward-ignoring agents
have Legg-Hutter intelligence 0 according to such UTMs.",
for = "461105(40%),460202(30%),500312(30%)",
}
@TechReport{Hutter:21causalseq,
author = "Pedro A. Ortega, Markus Kunesch, Grégoire Delétang, Tim Genewein, Jordi Grau-Moya, Joel Veness, Jonas Buchli, Jonas Degrave, Bilal Piot, Julien Perolat, Tom Everitt, Corentin Tallec, Emilio Parisotto, Tom Erez, Yutian Chen, Scott Reed, Marcus Hutter, Nando de Freitas, Shane Legg",
title = "Shaking the Foundations: Delusions in Sequence Models for Interaction and Control",
institution = "DeepMind",
address = "London",
number = "http://arxiv.org/abs/2110.10819",
pages = "1--16",
_month = oct,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#causalseq",
url = "http://arxiv.org/abs/2110.10819",
pdf = "http://www.hutter1.net/publ/causalseq.pdf",
project = "http://www.hutter1.net/official/projects.htm#mixed",
keywords = "sequence models; sequential prediction; reinforcement learning; causality; self-delusion",
abstract = "The recent phenomenal success of language models has
reinvigorated machine learning research, and large sequence
models such as transformers are being applied to a variety of
domains. One important problem class that has remained
relatively elusive however is purposeful adaptive behavior.
Currently there is a common perception that sequence models
"lack the understanding of the cause and effect of their
actions" leading them to draw incorrect inferences due to
auto-suggestive delusions. In this report we explain where this
mismatch originates, and show that it can be resolved by
treating actions as causal interventions. Finally, we show that
in supervised learning, one can teach a system to condition or
intervene on data by training with factual and counterfactual
error signals respectively.",
for = "460202(40%),461199(30%),461103(30%)",
}
@Article{Hutter:21compcon,
author = "Elliot Catt and Marcus Hutter and Joel Veness",
title = "Reinforcement Learning with Information-Theoretic Actuation",
journal = "arXiv",
pages = "1--11",
_month = sep,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#compcon",
url = "http://arxiv.org/abs/2109.15147",
pdf = "http://www.hutter1.net/publ/compcon.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
keywords = "Reinforcement Learning; large action spaces; compression; coding; internal actions; sampling.",
abstract = "Reinforcement Learning formalises an embodied agent's
interaction with the environment through observations, rewards
and actions. But where do the actions come from? Actions are
often considered to represent something external, such as the
movement of a limb, a chess piece, or more generally, the output
of an actuator. In this work we explore and formalize a
contrasting view, namely that actions are best thought of as the
output of a sequence of internal choices with respect to an
action model. This view is particularly well-suited for
leveraging the recent advances in large sequence models as prior
knowledge for multi-task reinforcement learning problems. Our
main contribution in this work is to show how to augment the
standard MDP formalism with a sequential notion of internal
action using information-theoretic techniques, and that this
leads to self-consistent definitions of both internal and
external action value functions.",
support = "ARC grant DP150104590",
for = "461105(50%),461301(50%)",
}
@Article{Hutter:21ai4hum,
author = "Reinhard Hutter and Marcus Hutter",
title = "Chances and Risks of Artificial Intelligence — A Concept of Developing and Exploiting Machine Intelligence for Future Societies",
journal = "Applied System Innovation",
volume = "4",
number = "2",
pages = "1--19",
publisher = "MDPI",
_month = jun,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#ai4hum",
http = "https://www.mdpi.com/2571-5577/4/2/37",
xurl = "http://arxiv.org/abs/2106.none",
pdf = "http://www.hutter1.net/publ/ai4hum.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
issn = "2571-5577",
doi = "10.3390/asi4020037",
keywords = "artificial and human intelligence; security; risks and risk management;
quality of life; common welfare; socio-political assessment",
abstract = "Artificial Intelligence (AI): Boon or Bane for societies? AI
technologies and solutions—as most revolutionary technologies
have done in the past—offer negative implications on the one
hand and considerable positive potential on the other. Avoiding
the former and fostering the latter will require substantial
investments in future societal concepts, research and
development, and control of AI-based solutions in AI security
while avoiding abuse. Preparation for the future role of AI in
societies should strive towards the implementation of related
methods and tools for risk management, models of complementary
human–machine cooperation, strategies for the optimization of
production and administration, and innovative concepts for the
distribution of the economic value created. Two extreme possible
“end states” of AI impact (if there is ever an end state) that
are being discussed at present may manifest as (a) uncontrolled
substitution by AI of major aspects of production, services, and
administrative and decision-making processes, leading to
unprecedented risks such as high unemployment, and devaluation
and the underpayment of people in paid work, resulting in
inequality in the distribution of wealth and employment,
diminishing social peace, social cohesion, solidarity, security,
etc., or, on the contrary, (b) the freeing of people from
routine labor through increased automation in production,
administration and services, and changing the constitution of
politics and societies into constituencies with high ethical
standards, personal self-determination, and the general
dominance of humane principles, as opposed to pure materialism.
Any mix of these two extremes could develop, and these
combinations may vary among different societies and political
systems.",
for = "460299(25%),440710(25%),440711(25%),441004(25%)",
}
@Article{Hutter:21alignx,
author = "Tom Everitt and Marcus Hutter and Ramana Kumar and Victoria Krakovna",
title = "Reward Tampering Problems and Solutions in Reinforcement Learning: A Causal Influence Diagram Perspective",
journal = "Synthese",
xvolume = "??",
xnumber = "??",
xpages = "??-??",
publisher = "Springer",
_month = may,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#alignx",
url = "http://arxiv.org/abs/1908.04734",
pdf = "http://www.hutter1.net/publ/alignx.pdf",
slides = "http://www.hutter1.net/publ/salign.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
code = "http://www.hutter1.net/publ/align.cpp",
issn = "0039-7857",
doi = "10.1007/s11229-021-03141-4",
keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs",
abstract = "Can humans get arbitrarily capable reinforcement learning (RL)
agents to do their bidding? Or will sufficiently capable RL
agents always find ways to bypass their intended objectives by
shortcutting their reward signal? This question impacts how far
RL can be scaled, and whether alternative paradigms must be
developed in order to build safe artificial general
intelligence. In this paper, we study when an RL agent has an
instrumental goal to tamper with its reward process, and
describe design principles that prevent instrumental goals for
two different types of reward tampering (reward function
tampering and RF-input tampering). Combined, the design
principles can prevent both types of reward tampering from being
instrumental goals. The analysis benefits from causal influence
diagrams to provide intuitive yet precise formalizations.",
support = "ARC grant DP150104590",
for = "461105(33%),460202(33%),500306(33%)",
}
@Article{Hutter:21ccamfrl,
author = "Thomas Mesnard, Théophane Weber, Fabio Viola, Shantanu Thakoor, Alaa Saade, Anna Harutyunyan, Will Dabney, Tom Stepleton, Nicolas Heess, Arthur Guez, Marcus Hutter, Lars Buesing, Rémi Munos",
title = "Counterfactual Credit Assignment in Model-Free Reinforcement Learning",
journal = "Journal of Machine Learning Research, W\&CP: ICML",
volume = "139",
pages = "7654--7664",
_editor = "Marina Meila and Tong Zhang",
_month = jul,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#ccamfrl",
url = "http://arxiv.org/abs/2011.09464",
pdf = "http://www.hutter1.net/publ/ccamfrl.pdf",
slides = "https://icml.cc/virtual/2021/poster/9795",
project = "http://www.hutter1.net/official/projects.htm#rl",
keywords = "Reinforcement Learning; Planning; Deep RL",
abstract = "Credit assignment in reinforcement learning is the problem of
measuring an action’s influence on future rewards. In
particular, this requires separating skill from luck, i.e.
disentangling the effect of an action on rewards from that of
external factors and subsequent actions. To achieve this, we
adapt the notion of counterfactuals from causality theory to a
model-free RL setup. The key idea is to condition value
functions on future events, by learning to extract relevant
information from a trajectory. We formulate a family of policy
gradient algorithms that use these future-conditional value
functions as baselines or critics, and show that they are
provably low variance. To avoid the potential bias from
conditioning on future information, we constrain the hindsight
information to not contain information about the agent's
actions. We demonstrate the efficacy and validity of our
algorithm on a number of illustrative and challenging problems.",
for = "461105(100%)",
znote = "Acceptance rate: 1184/5513 = 21\%",
}
@Article{Hutter:21ckillcat,
author = "Michael K. Cohen and Marcus Hutter and Elliot Catt",
title = "Curiosity Killed or Incapacitated the Cat and the Asymptotically Optimal Agent",
journal = "IEEE Journal on Selected Areas in Information Theory",
volume = "2",
number = "2",
pages = "665--677",
publisher = "IEEE",
_month = may,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#ckillcat",
url = "http://arxiv.org/abs/2006.03357",
pdf = "http://www.hutter1.net/publ/ckillcat.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
issn = "2641-8770",
doi = "10.1109/JSAIT.2021.3079722",
keywords = "Artificial intelligence; learning; autonomous agents;
Bayes methods; information theory; inference algorithms;
history; reinforcement learning; Markov processes",
abstract = "Reinforcement learners are agents that learn to pick actions
that lead to high reward. Ideally, the value of a reinforcement
learner’s policy approaches optimality—where the optimal
informed policy is the one which maximizes reward.
Unfortunately, we show that if an agent is guaranteed to be
“asymptotically optimal” in any (stochastically computable)
environment, then subject to an assumption about the true
environment, this agent will be either “destroyed” or
“incapacitated” with probability 1. Much work in reinforcement
learning uses an ergodicity assumption to avoid this problem.
Often, doing theoretical research under simplifying assumptions
prepares us to provide practical solutions even in the absence
of those assumptions, but the ergodicity assumption in
reinforcement learning may have led us entirely astray in
preparing safe and effective exploration strategies for agents
in dangerous environments. Rather than assuming away the
problem, we present an agent, Mentee, with the modest guarantee
of approaching the performance of a mentor, doing safe
exploration instead of reckless exploration. Critically,
Mentee’s exploration probability depends on the expected
information gain from exploring. In a simple non-ergodic
environment with a weak mentor, we find Mentee outperforms
existing asymptotically optimal agents and its mentor.",
support = "ARC grant DP150104590",
for = "460202(33%),460209(33%),461105(33%)",
}
@patent{Hutter:21glcbpatent,
author = "Eren Sezener and Joel Veness and Marcus Hutter and Jianan Wang and David Budden",
title = "Gated Linear Contextual Bandits",
_month = apr,
year = "2021",
number = "WO2021069574A1",
appl_number = "PCT/EP2020/078259",
_another_number = "45288-0091 WO1",
journal = "DeepMind",
url = "https://worldwide.espacenet.com/patent/search?q=pn%3DWO2021069574A1",
pdf = "http://www.hutter1.net/publ/glcbpatent.pdf",
type = "patent",
abstract = "Methods, systems, and apparatus, including computer programs
encoded on computer storage media, for selecting actions in
response to each context in a sequence of context inputs. One of
the methods includes maintaining data specifying a respective
gated linear network corresponding to each of the plurality of
actions; for each context in the sequence of contexts: for each
action, processing the context using the gated linear network
corresponding to the action to generate a predicted probability;
for each action, generating an action score for the action from
at least the predicted probability; and selecting the action to
be performed in response to the context based on the action
scores.",
for = "461104(100%)",
}
@Article{Hutter:21bomaix,
author = "Michael K. Cohen and Badri Vellambi and Marcus Hutter",
title = "Intelligence and Unambitiousness Using Algorithmic Information Theory",
journal = "IEEE Journal on Selected Areas in Information Theory",
volume = "2",
number = "2",
pages = "678--690",
publisher = "IEEE",
_month = apr,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#bomaix",
url = "http://arxiv.org/abs/2105.06268",
pdf = "http://www.hutter1.net/publ/bomaix.pdf",
slides = "http://www.hutter1.net/publ/sbomai.pdf",
poster = "http://www.hutter1.net/publ/pbomai.pdf",
press = "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db",
project = "http://www.hutter1.net/official/projects.htm#safe",
issn = "2641-8770",
doi = "10.1109/JSAIT.2021.3073844",
keywords = "information theory; task analysis; computational modeling; history;
schedules; Bayes methods; artificial general intelligence;
existental threat; alignment problem; power; instrumental goal;
reinforcement learning; inference algorithms; autonomous agents; learning",
abstract = "Algorithmic Information Theory has inspired intractable
constructions of general intelligence (AGI), and undiscovered
tractable approximations are likely feasible. Reinforcement
Learning (RL), the dominant paradigm by which an agent might
learn to solve arbitrary solvable problems, gives an agent a
dangerous incentive: to gain arbitrary “power” in order to
intervene in the provision of their own reward. We review the
arguments that generally intelligent
algorithmic-informationtheoretic reinforcement learners such as
Hutter’s 2 AIXI would seek arbitrary power, including over us.
Then, using an information-theoretic exploration schedule, and a
setup inspired by causal influence theory, we present a variant
of AIXI which learns to not seek arbitrary power; we call it
“unambitious”. We show that our agent learns to accrue reward at
least as well as a human mentor, while relying on that mentor
with diminishing probability. And given a formal assumption that
we probe empirically, we show that eventually, the agent’s
worldmodel incorporates the following true fact: intervening in
the “outside world” will have no effect on reward acquisition;
hence, it has no incentive to shape the outside world.",
support = "ARC grant DP150104590",
for = "460202(33%),460209(33%),461105(33%)",
}
@TechReport{Hutter:21dgn,
author = "Eren Sezener and Agnieszka Grabska-Barwińska and Dimitar Kostadinov and Maxime Beau and Sanjukta Krishnagopal and David Budden and Marcus Hutter and Joel Veness and Matthew Botvinick and Claudia Clopath and Michael Häusser and Peter E. Latham",
title = "A Rapid and Efficient Learning Rule for Biological Neural Circuits",
institution = "DeepMind",
address = "London, UK",
_month = mar,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#dgn",
http = "https://www.biorxiv.org/content/10.1101/2021.03.10.434756",
pdf = "http://www.hutter1.net/publ/dgn.pdf",
slides = "http://www.hutter1.net/publ/sdgn.pdf",
video = "http://youtu.be/g1F0F8JVmGs",
project = "http://www.hutter1.net/official/projects.htm#nn",
code = "http://github.com/deepmind/deepmind-research/tree/master/gated_linear_networks/colabs",
doi = "10.1101/2021.03.10.434756",
abstract = "The dominant view in neuroscience is that changes in synaptic
weights underlie learning. It is unclear, however, how the brain
is able to determine which synapses should change, and by how
much. This uncertainty stands in sharp contrast to deep
learning, where changes in weights are explicitly engineered to
optimize performance. However, the main tool for doing that,
backpropagation, is not biologically plausible, and networks
trained with this rule tend to forget old tasks when learning
new ones. Here we introduce the Dendritic Gated Network (DGN), a
variant of the Gated Linear Network [1, 2], which offers a
biologically plausible alternative to backpropagation. DGNs
combine dendritic “gating” (whereby interneurons target
dendrites to shape neuronal response) with local learning rules
to yield provably efficient performance. They are significantly
more data efficient than conventional artificial networks and
are highly resistant to forgetting, and we show that they
perform well on a variety of tasks, in some cases better than
backpropagation. The DGN bears similarities to the cerebellum,
where there is evidence for shaping of Purkinje cell responses
by interneurons. It also makes several experimental predictions,
one of which we validate with in vivo cerebellar imaging of mice
performing a motor task.",
for = "461104(40%),520202(30%),520203(30%)",
}
@TechReport{Hutter:21scaling,
author = "Marcus Hutter",
title = "Learning Curve Theory",
institution = "DeepMind",
address = "London",
number = "http://arxiv.org/abs/2102.04074",
_month = feb,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#scaling",
url = "http://arxiv.org/abs/2102.04074",
pdf = "http://www.hutter1.net/publ/scaling.pdf",
slides = "http://www.hutter1.net/publ/sscaling.pdf",
video1 = "http://youtu.be/q5YhJ8QDkMQ",
video2 = "http://media.mis.mpg.de/mml/2021-03-04",
project = "http://www.hutter1.net/official/projects.htm#mixed",
keywords = "Power Law, Scaling, Learning Curve, Theory, Data Size, Error, Loss, Zipf",
abstract = "Recently a number of empirical ``universal'' scaling law papers
have been published, most notably by OpenAI. `Scaling laws'
refers to power-law decreases of training or test error w.r.t.\
more data, larger neural networks, and/or more compute. In this
work we focus on scaling w.r.t.\ data size $n$. Theoretical
understanding of this phenomenon is in its infancy, except in
finite-dimensional models for which error typically decreases
with $n^{-1/2}$ or $n^{-1}$, where $n$ is the sample size. We
develop and theoretically analyse the simplest possible (toy)
model that can exhibit $n^{-β}$ learning curves for arbitrary
power $β>0$, and determine to which extent power laws are
universal or depend on the data distribution or loss function:
Roughly, learning curves exhibit a power law with
$β=\frac{α}{1+α}$ for Zipf-distributed data with exponent $1+α$,
independent of the choice of loss. Furthermore, noise rapidly
deteriorates/improves in instantaneous/time-averaged learning
curves for increasing $n$, suggesting that model selection
should better be based on cumulative (AUC) or time-averaged error,
not final test error.",
for = "461199(70%),461103(30%)",
}
@InProceedings{Hutter:21binesa,
author = "Sultan Javed Majeed and Marcus Hutter",
title = "Exact Reduction of Huge Action Spaces in General Reinforcement Learning",
booktitle = "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})",
address = "Virtual, Earth",
volume = "35",
publisher = "AAAI Press",
_month = feb,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#binesa",
url = "http://arxiv.org/abs/2012.10200",
pdf = "http://www.hutter1.net/publ/binesa.pdf",
slides = "http://www.hutter1.net/publ/sbinesa.pdf",
poster = "http://www.hutter1.net/publ/pbinesa.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
keywords = "reinforcement learning; extreme state aggregation; action binarization; non-Markov",
abstract = "The reinforcement learning (RL) framework formalizes the notion
of learning with interactions. Many real-world problems have
large state-spaces and/or action-spaces such as in Go, StarCraft,
protein folding, and robotics or are non-Markovian, which cause
significant challenges to RL algorithms. In this work we address
the large action-space problem by sequentializing actions, which
can reduce the action-space size significantly, even down to two
actions at the expense of an increased planning horizon. We
provide explicit and exact constructions and equivalence proofs
for all quantities of interest for arbitrary history-based
processes. In the case of MDPs, this could help RL algorithms
that bootstrap. In this work we show how action-binarization in
the non-MDP case can significantly improve Extreme State
Aggregation (ESA) bounds. ESA allows casting any (non-MDP,
non-ergodic, history-based) RL problem into a fixed-sized
non-Markovian state-space with the help of a surrogate Markovian
process. On the upside, ESA enjoys similar optimality guarantees
as Markovian models do. But a downside is that the size of the
aggregated state-space becomes exponential in the size of the
action-space. In this work, we patch this issue by binarizing
the action-space. We provide an upper bound on the number of
states of this binarized ESA that is logarithmic in the original
action-space size, a double-exponential improvement.",
support = "ARC grant DP150104590",
for = "461105(100%)",
znote = "Acceptance rate: 1692/7911=21\%",
}
@InProceedings{Hutter:21shortgln,
author = "Joel Veness, Tor Lattimore, David Budden, Avishkar Bhoopchand, Christopher Mattern, Agnieszka Grabska-Barwinska, Eren Sezener, Jianan Wang, Peter Toth, Simon Schmitt, Marcus Hutter",
title = "Gated Linear Networks",
booktitle = "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})",
address = "Virtual, Earth",
volume = "35",
publisher = "AAAI Press",
_month = feb,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#shortgln",
url = "http://arxiv.org/abs/1910.01526",
pdf = "http://www.hutter1.net/publ/shortgln.pdf",
slides = "http://www.hutter1.net/publ/sshortgln.pdf",
poster = "http://www.hutter1.net/publ/pshortgln.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
press = "http://www.reddit.com/r/MachineLearning/comments/hx0q69/r_deepminds_gated_linear_networks_paper_and_code/",
code = "http://github.com/aiwabdn/pygln",
keywords = "gating; linear; network; geometric mixing; capacity; backpropagation-free;
online convex optimization; resilience to catastrophic forgetting;
density estimation; empirical evaluation; UCI; MNIST",
abstract = "This paper presents a new family of backpropagation-free neural
architectures, Gated Linear Networks (GLNs). What distinguishes
GLNs from contemporary neural networks is the distributed and
local nature of their credit assignment mechanism; each neuron
directly predicts the target, forgoing the ability to learn
feature representations in favor of rapid online learning.
Individual neurons can model nonlinear functions via the use of
data-dependent gating in conjunction with online convex
optimization. We show that this architecture gives rise to
universal learning capabilities in the limit, with effective
model capacity increasing as a function of network size in a
manner comparable with deep ReLU networks. Furthermore, we
demonstrate that the GLN learning mechanism possesses
extraordinary resilience to catastrophic forgetting, performing
comparably to a MLP with dropout and Elastic Weight
Consolidation on standard benchmarks. These desirable
theoretical and empirical properties position GLNs as a
complementary technique to contemporary offline deep learning
methods.",
for = "461104(100%)",
znote = "Acceptance rate: 1692/7911=21\%",
}
@InProceedings{Hutter:21outman,
author = "Len Du and Marcus Hutter",
title = "How Useful are Hand-crafted Data? Making Cases for Anomaly Detection Methods",
booktitle = "54th Hawaii International Conference on System Sciences ({HICSS'21})",
address = "Maui, Hawaii, USA",
volume = "54",
pages = "847--856",
publisher = "ScholarSpace",
_month = jan,
year = "2021",
bibtex = "http://www.hutter1.net/official/bib.htm#outman",
http = "http://hdl.handle.net/10125/70716",
pdf = "http://www.hutter1.net/publ/outman.pdf",
slides = "http://www.hutter1.net/publ/soutman.pdf",
project = "http://www.hutter1.net/official/projects.htm#mixed",
isbn = "978-0-9981331-4-0",
keywords = "accountability; evaluation; obscurity of AI algorithms;
anomaly detection; evaluation; explainability; small data; testing AI",
abstract = "While the importance of small data has been admitted in
principle, they have not been widely adopted as a necessity in
current machine learning or data mining research. Most
predominantly, machine learning methods were typically evaluated
under a “bigger is better” presumption. The more (and the more
complex) data we could pour at a method, the better we thought
we were at estimating its performance. We deem this mindset
detrimental to interpretability, explainability, and the
sustained development of the field. For example, despite that
new outlier detection methods were often inspired by small, low
dimensional samples, their performance has been exclusively
evaluated by large, high-dimensional datasets resembling
real-world use cases. With these “big data” we miss the chance
to gain insights from close looks at how exactly the algorithms
perform, as we mere humans cannot really comprehend the samples.
In this work, we explore in the exactly opposite direction. We
run several classical anomaly detection methods against small,
mindfully crafted cases on which the results can be examined in
detail. In addition to better understanding of these classical
algorithms, our exploration has actually led to the discovery of
some novel uses of classical anomaly detection methods to our
surprise.",
for = "460502(100%)",
note = "Nominated for best paper award: http://hicss.hawaii.edu/best-papers/",
znote = "Acceptance rate: 710/1449=49\%",
}
%-------------Publications-of-Marcus-Hutter-2020--------------%
@InProceedings{Hutter:20nnprune,
author = "Laurent Orseau and Marcus Hutter and Omar Rivasplata",
title = "Logarithmic Pruning is All You Need",
booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})",
volume = "33",
pages = "2925--2934",
_editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
publisher = "Curran Associates",
address = "Cambridge, MA, USA",
_month = dec,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#nnprune",
http = "http://papers.nips.cc/paper/2020/hash/1e9491470749d5b0e361ce4f0b24d037-Abstract.html",
url = "http://arxiv.org/abs/2006.12156",
pdf = "http://www.hutter1.net/publ/nnprune.pdf",
poster = "http://www.hutter1.net/publ/pnnprune.pdf",
video = "http://nips.cc/virtual/2020/public/poster_1e9491470749d5b0e361ce4f0b24d037.html",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "subnetwork; lottery ticket hypothesis; pruning; mixture of weights",
abstract = "The Lottery Ticket Hypothesis is a conjecture that every large
neural network contains a subnetwork that, when trained in
isolation, achieves comparable performance to the large network.
An even stronger conjecture has been proven recently: Every
sufficiently overparameterized network contains a subnetwork
that, even without training, achieves comparable accuracy to the
trained large network. This theorem, however, relies on a number
of strong assumptions and provides a loose polynomial factor on
the size of the large network compared to the target function.
In this work, we remove the most limiting assumptions of this
previous work while providing significantly tighter bounds: the
overparameterized network only needs to be a logarithmic factor
in the accuracy larger than the target subnetwork.",
for = "461104(100%)",
znote = "Acceptance rate: 1900/9454=20\%. Spotlight: 280/9454=3\%",
}
@InProceedings{Hutter:20:nctlfmn,
author = "Jianan Wang and Eren Sezener and David Budden and Marcus Hutter and Joel Veness",
title = "A Combinatorial Perspective on Transfer Learning",
booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})",
volume = "33",
pages = "918--929",
_editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
publisher = "Curran Associates",
address = "Cambridge, MA, USA",
_month = dec,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#nctlfmn",
http = "http://papers.nips.cc/paper/2020/hash/0a3b6f64f0523984e51323fe53b8c504-Abstract.html",
url = "http://arxiv.org/abs/2010.12268",
pdf = "http://www.hutter1.net/publ/nctlfmn.pdf",
poster = "http://www.hutter1.net/publ/pnctlfmn.pdf",
video = "http://nips.cc/virtual/2020/public/poster_0a3b6f64f0523984e51323fe53b8c504.html",
project = "http://www.hutter1.net/official/projects.htm#nn",
code = "http://github.com/aiwabdn/pygln",
keywords = "gating; linear; network; transfer learning; online convex optimization;
geometric mixing; resilience to catastrophic forgetting;
forget me not process; empirical evaluation; MNIST",
abstract = "Human intelligence is characterized not only by the capacity to
learn complex skills, but the ability to rapidly adapt and
acquire new skills within an ever-changing environment. In this
work we study how the learning of modular solutions can allow
for effective generalization to both unseen and potentially
differently distributed data. Our main postulate is that the
combination of task segmentation, modular learning and
memory-based ensembling can give rise to generalization on an
exponentially growing number of unseen tasks. We provide a
concrete instantiation of this idea using a combination of: (1)
the Forget-Me-Not Process, for task segmentation and memory
based ensembling; and (2) Gated Linear Networks, which in
contrast to contemporary deep learning techniques use a modular
and local learning mechanism. We demonstrate that this system
exhibits a number of desirable continual learning properties:
robustness to catastrophic forgetting, no negative transfer and
increasing levels of positive transfer as more tasks are seen.
We show competitive performance against both offline and online
methods on standard continual learning benchmarks.",
for = "461104(100%)",
znote = "Acceptance rate: 1900/9454=20\%",
}
@InProceedings{Hutter:20banditgln,
author = "Eren Sezener, Marcus Hutter, David Budden, Jianan Wang, Joel Veness",
title = "Online Learning in Contextual Bandits using Gated Linear Networks",
booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})",
volume = "33",
pages = "19467--19477",
_editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin",
publisher = "Curran Associates",
address = "Cambridge, MA, USA",
_month = dec,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#banditgln",
http = "http://papers.nips.cc/paper/2020/hash/e287f0b2e730059c55d97fa92649f4f2-Abstract.html",
url = "http://arxiv.org/abs/2002.11611",
pdf = "http://www.hutter1.net/publ/banditgln.pdf",
poster = "http://www.hutter1.net/publ/pbanditgln.pdf",
video = "http://nips.cc/virtual/2020/public/poster_e287f0b2e730059c55d97fa92649f4f2.html",
project = "http://www.hutter1.net/official/projects.htm#nn",
code = "http://github.com/aiwabdn/pygln",
keywords = "gating; linear; network; contextual bandits; online convex optimization; geometric mixing",
abstract = "We introduce a new and completely online contextual bandit
algorithm called Gated Linear Contextual Bandits (GLCB). This
algorithm is based on Gated Linear Networks (GLNs), a recently
introduced deep learning architecture with properties
well-suited to the online setting. Leveraging data-dependent
gating properties of the GLN we are able to estimate prediction
uncertainty with effectively zero algorithmic overhead. We
empirically evaluate GLCB compared to 9 state-of-the-art
algorithms that leverage deep neural networks, on a standard
benchmark suite of discrete and continuous contextual bandit
problems. GLCB obtains mean first-place despite being the only
online method, and we further support these results with a
theoretical study of its convergence properties.",
for = "461104(100%)",
znote = "Acceptance rate: 1900/9454=20\%",
}
@Article{Hutter:20gpt3agi,
author = "Marcus Hutter",
title = "GPT-3 and AGI",
publisher = "Trusted Autonomous Systems",
_month = aug,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#gpt3agi",
http = "http://www.eventbrite.com.au/e/a-discussion-on-gpt-3-and-artificial-general-intelligence-tickets-116673544713",
slides = "http://www.hutter1.net/publ/sgpt3agi.pdf",
video = "http://youtu.be/E25Uk8WpYQE",
project = "http://www.hutter1.net/official/projects.htm#agi",
discussion = "http://youtu.be/aDFLp4A1EmY",
keywords = "Deep Learning; Generative Pre-Trained Transformer; Inner Working;
Artificial General Intellgence; Achievements; Limitations; Philosophy; Outlook",
abstract = "GPT-3 stands for Generative Pre-trained Transformer 3.
It is a gargantuan artificial Neural Network (NN) around the size of a
mouse brain, trained on essentially the whole internet and millions of
books. GPT-3 has demonstrated impressive performance on a wide
range of language tasks. Most discussions focus on GPT-3’s
performance. In this talk I will give a glimpse of how GPT-3 actually
works, and ask and tentatively answer the question of whether it is a
step towards creating Artificial General Intelligence (AGI). The talk has
been given as a primer in a panel discussion on this topic.",
for = "461103(33%),460208(33%),460202(33%)",
}
@InProceedings{Hutter:20aixipess,
author = "Michael Cohen and Marcus Hutter",
title = "Pessimism About Unknown Unknowns Inspires Conservatism",
booktitle = "33rd Conference on Learning Theory ({COLT'20})",
address = "Virtual / Graz, Austria",
volume = "125",
series = "Proceedings of Machine Learning Research",
pages = "1344--1373",
_editor = "Jacob Abernethy and Shivani Agarwal",
publisher = "PMLR",
_month = jul,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#aixipess",
http = "http://proceedings.mlr.press/v125/cohen20a.html",
url = "http://arxiv.org/abs/2006.08753",
pdf = "http://www.hutter1.net/publ/aixipess.pdf",
slides = "http://www.hutter1.net/publ/saixipess.pdf",
video = "http://www.colt2020.org/virtual/papers/paper_221.html",
project = "http://www.hutter1.net/official/projects.htm#safe",
issn = "1532-4435",
keywords = "",
abstract = "If we could define the set of all bad outcomes, we could
hard-code an agent which avoids them; however, in sufficiently
complex environments, this is infeasible. We do not know of any
general-purpose approaches in the literature to avoiding novel
failure modes. Motivated by this, we define an idealized
Bayesian reinforcement learner which follows a policy that
maximizes the worst-case expected reward over a set of
world-models. We call this agent pessimistic, since it optimizes
assuming the worst case. A scalar parameter tunes the agent's
pessimism by changing the size of the set of world-models taken
into account. Our first main contribution is: given an
assumption about the agent's model class, a sufficiently
pessimistic agent does not cause ``unprecedented events'' with
probability $1-\delta$, whether or not designers know how to
precisely specify those precedents they are concerned with.
Since pessimism discourages exploration, at each timestep, the
agent may defer to a mentor, who may be a human or some
known-safe policy we would like to improve. Our other main
contribution is that the agent's policy's value approaches at
least that of the mentor, while the probability of deferring to
the mentor goes to 0. In high-stakes environments, we might like
advanced artificial agents to pursue goals cautiously, which is
a non-trivial problem even if the agent were allowed arbitrary
computing power; we present a formal solution.",
support = "ARC grant DP150104590",
for = "460202(33%),460209(33%),461105(33%)",
znote = "Acceptance rate: 119/388 = 31\%",
}
@TechReport{Hutter:20asymnn,
author = "Marcus Hutter",
title = "On Representing (Anti)Symmetric Functions",
institution = "DeepMind",
address = "London, UK",
number = "arXiv:2007.15298",
_month = jun,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#asymnn",
url = "http://arxiv.org/abs/2007.15298",
pdf = "http://www.hutter1.net/publ/asymnn.pdf",
project = "http://www.hutter1.net/official/projects.htm#nn",
keywords = "Neural network, approximation, universality, Slater determinant, Vandermonde
matrix, equivariance, symmetry, anti-symmetry, symmetric polynomials, polarized
basis, multilayer perceptron, continuity, smoothness",
abstract = "Permutation-invariant, -equivariant, and -covariant functions
and anti-symmetric functions are important in quantum physics,
computer vision, and other disciplines. Applications often
require most or all of the following properties: (a) a large
class of such functions can be approximated, e.g. all continuous
function, (b) only the (anti)symmetric functions can be
represented, (c) a fast algorithm for computing the
approximation, (d) the representation itself is continuous or
differentiable, (e) the architecture is suitable for learning
the function from data. (Anti)symmetric neural networks have
recently been developed and applied with great success. A few
theoretical approximation results have been proven, but many
questions are still open, especially for particles in more than
one dimension and the anti-symmetric case, which this work
focusses on. More concretely, we derive natural polynomial
approximations in the symmetric case, and approximations based
on a single generalized Slater determinant in the anti-symmetric
case. Unlike some previous super-exponential and discontinuous
approximations, these seem a more promising basis for future
tighter bounds. We provide a complete and explicit universality
proof of the Equivariant MultiLayer Perceptron, which implies
universality of symmetric MLPs and the FermiNet.",
for = "461104(50%),510899(50%)",
}
@TechReport{Hutter:20qcsol,
author = "Elliot Catt and Marcus Hutter",
title = "A Gentle Introduction to Quantum Computing Algorithms with Applications to Universal Prediction",
institution = "Australian National University",
address = "Canberra, Australia",
number = "arXiv:2005.03137",
_month = may,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#qcsol",
url = "http://arxiv.org/abs/2005.03137",
pdf = "http://www.hutter1.net/publ/qcsol.pdf",
slides = "http://www.hutter1.net/publ/sqcsol.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
abstract = "In this technical report we give an elementary introduction to
Quantum Computing for non- physicists. In this introduction we
describe in detail some of the foundational Quantum Algorithms
including: the Deutsch-Jozsa Algorithm, Shor’s Algorithm, Grocer
Search, and Quantum Counting Algorithm and briefly the
Harrow-Lloyd Algorithm. Additionally we give an introduction to
Solomonoff Induction, a theoretically optimal method for
prediction. We then attempt to use Quantum computing to find
better algorithms for the approximation of Solomonoff Induction.
This is done by using techniques from other Quantum computing
algorithms to achieve a speedup in computing the speed prior,
which is an approximation of Solomonoff’s prior, a key part of
Solomonoff Induction. The major limiting factors are that the
probabilities being computed are often so small that without a
sufficient (often large) amount of trials, the error may be
larger than the result. If a substantial speedup in the
computation of an approximation of Solomonoff Induction can be
achieved through quantum computing, then this can be applied to
the field of intelligent agents as a key part of an
approximation of the agent AIXI.",
for = "461307(40%),461105(30%),460299(30%)",
}
@InProceedings{Hutter:20bomai,
author = "Michael Cohen and Badri Vellambi and Marcus Hutter",
title = "Asymptotically Unambitious Artificial General Intelligence",
booktitle = "Proc. 34rd {AAAI} Conference on Artificial Intelligence ({AAAI'20})",
address = "New York, USA",
_editor = "F. Rossi and V. Conitzer and F. Sha",
volume = "34",
number = "3",
pages = "2467--2476",
publisher = "AAAI Press",
_month = feb,
year = "2020",
bibtex = "http://www.hutter1.net/official/bib.htm#bomai",
url = "http://arxiv.org/abs/1905.12186",
pdf = "http://www.hutter1.net/publ/bomai.pdf",
slides = "http://www.hutter1.net/publ/sbomai.pdf",
poster = "http://www.hutter1.net/publ/pbomai.pdf",
press = "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db",
project = "http://www.hutter1.net/official/projects.htm#safe",
issn = "2159-5399",
isbn = "978-1-57735-835-0",
doi = "10.1609/aaai.v34i03.5628",
keywords = "artificial general intelligence; history; schedules; Bayes methods;
existental threat; alignment problem; power; instrumental goal; reinforcement learning.",
abstract = "General intelligence, the ability to solve arbitrary solvable
problems, is supposed by many to be artificially constructible.
Narrow intelligence, the ability to solve a given particularly
difficult problem, has seen impressive recent development.
Notable examples include self-driving cars, Go engines, im-
age classifiers, and translators. Artificial General Intelligence
(AGI) presents dangers that narrow intelligence does not: if
something smarter than us across every domain were indif-
ferent to our concerns, it would be an existential threat to
humanity, just as we threaten many species despite no ill will.
Even the theory of how to maintain the alignment of an AGI’s
goals with our own has proven highly elusive. We present the
first algorithm we are aware of for asymptotically unambitious
AGI, where “unambitiousness” includes not seeking arbitrary
power. Thus, we identify an exception to the Instrumental
Convergence Thesis, which is roughly that by default, an AGI
would seek power, including over us.",
support = "ARC grant DP150104590",
for = "460202(33%),460209(33%),461105(33%)",
znote = "Acceptance rate: 1591/7737=21\%",
}
%-------------Publications-of-Marcus-Hutter-2019--------------%
@Article{Hutter:19aligns,
author = "Tom Everitt and Ramana Kumar and Marcus Hutter",
title = "Designing Agent Incentives to Avoid Reward Tampering",
journal = "Medium",
volume = "8",
number = "14",
_month = aug,
year = "2019",
bibtex = "http://www.hutter1.net/official/bib.htm#aligns",
url = "http://medium.com/@deepmindsafetyresearch/designing-agent-incentives-to-avoid-reward-tampering-4380c1bb6cd",
pdf = "http://www.hutter1.net/publ/aligns.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs",
abstract = "From an AI safety perspective, having a clear design principle
and a crisp characterization of what problem it solves means
that we don’t have to guess which agents are safe. In this post
and paper we describe how a design principle called current-RF
optimization avoids the reward function tampering problem.",
for = "080101(60%),220312(20%),080198(20%)",
seo = "970108(80%),970117(20%)",
}
@InProceedings{Hutter:19rlwlinfa,
author = "Marcus Hutter and Samuel Yang-Zhao and Sultan Javed Majeed",
title = "Conditions on Features for Temporal Difference-Like Methods to Converge",
booktitle = "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})",
address = "Macao, China",
_editor = "Sarit Kraus",
_publisher = "IJCAI",
pages = "2570--2577",
_month = aug,
year = "2019",
bibtex = "http://www.hutter1.net/official/bib.htm#rlwlinfa",
url = "http://arxiv.org/abs/1905.11702",
pdf = "http://www.hutter1.net/publ/rlwlinfa.pdf",
slides = "http://www.hutter1.net/publ/srlwlinfa.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
isbn = "978-0-9992411-4-1",
doi = "10.24963/ijcai.2019/357",
keywords = "reinforcement learning; temporal difference learning; Bellman equation;
unique solution; linear function approximation; convergence;
negative result; wrong solution; natural algorithm",
abstract = "The convergence of many reinforcement learning (RL) algorithms
with linear function approximation has been investigated
extensively but most proofs assume that these methods converge
to a unique solution. In this paper, we provide a complete
characterization of non-uniqueness issues for a large class of
reinforcement learning algorithms, simultaneously unifying many
counter-examples to convergence in a theoretical framework. We
achieve this by proving a new condition on features that can
determine whether the convergence assumptions are valid or
non-uniqueness holds. We consider a general class of RL methods,
which we call natural algorithms, whose solutions are
characterized as the fixed point of a projected Bellman
equation. Our main result proves that natural algorithms
converge to the correct solution if and only if all the value
functions in the approximation space satisfy a certain shape.
This implies that natural algorithms are, in general, inherently
prone to converge to the wrong solution for most feature choices
even if the value function can be represented exactly. Given our
results, we show that state aggregation-based features are a
safe choice for natural algorithms and also provide a condition
for finding convergent algorithms under other feature
constructions.",
support = "ARC grant DP150104590",
for = "080101(60%),010404(40%)",
seo = "970108(100%)",
znote = "Acceptance rate: 850/4752=35\%",
}
@InProceedings{Hutter:19ksasao,
author = "Michael Cohen and Elliot Catt and Marcus Hutter",
title = "A Strongly Asymptotically Optimal Agent in General Environments",
booktitle = "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})",
address = "Macao, China",
_editor = "Sarit Kraus",
_publisher = "IJCAI",
pages = "2179--2186",
_month = aug,
year = "2019",
bibtex = "http://www.hutter1.net/official/bib.htm#ksasao",
url = "http://arxiv.org/abs/1903.01021",
pdf = "http://www.hutter1.net/publ/ksasao.pdf",
slides = "http://www.hutter1.net/publ/sksasao.pdf",
poster = "http://www.hutter1.net/publ/pksasao.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-0-9992411-4-1",
doi = "10.24963/ijcai.2019/302",
keywords = "reinforcement learning; model-based reasoning;
sequential decision making; probabilistic inference; AIXI",
abstract = "Reinforcement Learning agents are expected to eventually perform
well. Typically, this takes the form of a guarantee about the
asymptotic behavior of an algorithm given some assumptions about
the environment. We present an algorithm for a policy whose
value approaches the optimal value with probability 1 in all
computable probabilistic environments, provided the agent has a
bounded horizon. This is known as strong asymptotic optimality,
and it was previously unknown whether it was possible for a
policy to be strongly asymptotically optimal in the class of all
computable probabilistic environments. Our agent, Inquisitive
Reinforcement Learner (Inq), is more likely to explore the more
it expects an exploratory action to reduce its uncertainty about
which environment it is in, hence the term inquisitive.
Exploring inquisitively is a strategy that can be applied
generally; for more manageable environment classes,
inquisitiveness is tractable. We conducted experiments in
``grid-worlds'' to compare the Inquisitive Reinforcement Learner
to other weakly asymptotically optimal agents.",
support = "ARC grant DP150104590",
for = "080101(60%),010404(40%)",
seo = "970108(100%)",
znote = "Acceptance rate: 850/4752=35\%",
}
@TechReport{Hutter:19fair,
author = "Marcus Hutter",
title = "Fairness without Regret",
institution = "DeepMind \& ANU",
_month = jul,
year = "2019",
bibtex = "http://www.hutter1.net/official/bib.htm#fair",
url = "http://arxiv.org/abs/1907.05159",
pdf = "http://www.hutter1.net/publ/fair.pdf",
latex = "http://www.hutter1.net/publ/fair.tex",
slides = "http://www.hutter1.net/publ/sfair.pdf",
video = "https://hmi.anu.edu.au/events-2/2022/4/11/hmi-dais-18-fairness-without-regret",
project = "http://www.hutter1.net/official/projects.htm#mixed",
keywords = "utility; objective; optimal; fair/equitable/just; cost/regret; uncertainty.",
abstract = "A popular approach of achieving fairness in optimization problems
is by constraining the solution space to ``fair'' solutions,
which unfortunately typically reduces solution quality.
In practice, the ultimate goal is often an aggregate of sub-goals
without a unique or best way of combining them or which is
otherwise only partially known. I turn this problem into a feature
and suggest to use a parametrized objective and vary the parameters
within reasonable ranges to get a {\em set} of optimal solutions,
which can then be optimized using secondary criteria such as
fairness without compromising the primary objective,
i.e.\ without regret (societal cost).",
for = "220104(70%),010303(30%)",
seo = "940401(70%),970108(30%)",
}
@InProceedings{Hutter:19actagg,
author = "Sultan Javed Majeed and Marcus Hutter",
title = "Performance Guarantees for Homomorphisms beyond Markov Decision Processes",
booktitle = "Proc. 33rd {AAAI} Conference on Artificial Intelligence ({AAAI'19})",
address = "Honolulu, USA",
volume = "33",
pages = "7659--7666",
publisher = "AAAI Press",
_month = jan,
year = "2019",
bibtex = "http://www.hutter1.net/official/bib.htm#actagg",
url = "http://arxiv.org/abs/1811.03895",
pdf = "http://www.hutter1.net/publ/actagg.pdf",
poster = "http://www.hutter1.net/publ/sactagg.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
issn = "2159-5399",
isbn = "978-1-57735-809-1",
doi = "10.1609/aaai.v33i01.33017659",
keywords = "homomorphism; state aggregation; non-MDP; action-value aggregation;
reinforcement learning.",
abstract = "Most real-world problems have huge state and/or action spaces.
Therefore, a naive application of existing tabular solution
methods is not tractable on such problems. Nonetheless, these
solution methods are quite useful if an agent has access to a
relatively small state-action space homomorphism of the true
environment and near-optimal performance is guaranteed by the
map. A plethora of research is focused on the case when the
homomorphism is a Markovian representation of the underlying
process. However, we show that nearoptimal performance is
sometimes guaranteed even if the homomorphism is non-Markovian.",
support = "ARC grant DP150104590",
for = "080101(50%),080198(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 1150/7095=16\%",
}
%-------------Publications-of-Marcus-Hutter-2018--------------%
@InProceedings{Hutter:18agisafe,
author = "Tom Everitt and Gary Lea and Marcus Hutter",
title = "{AGI} Safety Literature Review",
booktitle = "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})",
address = "Stockholm, Sweden",
_editor = "Jérôme Lang",
_publisher = "IJCAI",
pages = "5441--5449",
_month = jul,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#safe",
url = "http://arxiv.org/abs/1805.01109",
pdf = "http://www.hutter1.net/publ/agisafe.pdf",
slides = "http://www.hutter1.net/publ/sagisafe.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
isbn = "978-0-9992411-2-7",
doi = "10.24963/ijcai.2018/768",
keywords = "reinforcement learning; philosophical and ethical issues;
artificial general intelligence; AGI safety; public policy;
survey; future AGI.",
abstract = "The development of Artificial General Intelligence (AGI) promises
to be a major event. Along with its many potential benefits, it
also raises serious safety concerns. The intention of this paper is
to provide an easily accessible and up-to-date collection of
references for the emerging field of AGI safety. A significant
number of safety problems for AGI have been identified. We list
these, and survey recent research on solving them. We also cover
works on how best to think of AGI from the limited knowledge we
have today, predictions for when AGI will first be created, and
what will happen after its creation. Finally, we review the current
public policy on AGI.",
note = "IJCAI Review Track",
support = "ARC grant DP150104590",
for = "080101(60%),220312(20%),080198(20%)",
seo = "970108(80%),970117(20%)",
znote = "Acceptance rate: 15/43=35\%",
}
@InProceedings{Hutter:18qnonmdp,
author = "Sultan Javed Majeed, Marcus Hutter",
title = "On {Q}-learning Convergence for Non-{M}arkov Decision Processes",
booktitle = "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})",
address = "Stockholm, Sweden",
_editor = "Jérôme Lang",
_publisher = "IJCAI",
pages = "2546--2552",
_month = jul,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#qnonmdp",
xurl = "http://arxiv.org/abs/1807.none",
pdf = "http://www.hutter1.net/publ/qnonmdp.pdf",
slides = "http://www.hutter1.net/publ/sqnonmdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
isbn = "978-0-9992411-2-7",
doi = "10.24963/ijcai.2018/353",
keywords = "reinforcement learning; TD-learning; Q-learning; non-MDP;
non-ergodic; convergence; abstractions; state-uniformity.",
abstract = "Temporal-difference (TD) learning is an attractive, computationally
efficient framework for model- free reinforcement learning.
Q-learning is one of the most widely used TD learning technique
that enables an agent to learn the optimal action-value function,
i.e. Q-value function. Contrary to its widespread use, Q-learning
has only been proven to converge on Markov Decision Processes
(MDPs) and Q-uniform abstractions of finite-state MDPs. On the
other hand, most real-world problems are inherently non-Markovian:
the full true state of the environment is not revealed by recent
observations. In this paper, we investigate the behavior of
Q-learning when applied to non-MDP and non-ergodic domains which
may have infinitely many underlying states. We prove that the
convergence guarantee of Q-learning can be extended to a class of
such non-MDP problems, in particular, to some non-stationary
domains. We show that state-uniformity of the optimal Q-value
function is a necessary and sufficient condition for Q-learning to
converge even in the case of infinitely many internal states.",
for = "080101(50%),080198(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 710/3470=21\%",
}
@Article{Hutter:18off2onx,
author = "Marcus Hutter",
title = "Tractability of Batch to Sequential Conversion",
journal = "Theoretical Computer Science",
volume = "733",
pages = "71--82",
publisher = "Elsevier",
_month = jul,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#off2onx",
url = "http://arxiv.org/abs/1407.3334",
pdf = "http://www.hutter1.net/publ/off2onx.pdf",
latex = "http://www.hutter1.net/publ/off2onx.tex",
slides = "http://www.hutter1.net/publ/soff2on.pdf",
project = "http://www.hutter1.net/official/projects.htm#infoth",
issn = "0304-3975",
doi = "10.1016/j.tcs.2018.04.037",
keywords = "offline; online; batch; sequential; probability; estimation;
prediction; time-consistency; normalization; tractable; regret;
combinatorics; Bayes; Laplace; Ristad; Good-Turing.",
abstract = "We consider the problem of converting batch estimators into a
sequential predictor or estimator with small extra regret. Formally
this is the problem of merging a collection of probability
measures over strings of length 1,2,3,... into a single
probability measure over infinite sequences. We describe various
approaches and their pros and cons on various examples. As a
side-result we give an elementary non-heuristic purely
combinatoric derivation of Turing's famous estimator. Our main
technical contribution is to determine the computational
complexity of sequential estimators with good guarantees in general.
We conclude with an open problem on how to derive tractable
sequential from batch estimators with good guarantees in general.",
for = "080401(30%),080201(30%),010405(40%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:18convbinctw,
author = "Badri N. Vellambi and Marcus Hutter",
title = "Convergence of Binarized Context-tree Weighting for Estimating Distributions of Stationary Sources",
booktitle = "Proc. {IEEE} International Symposium on Information Theory ({ISIT'18})",
address = "Vail, USA",
pages = "731--735",
_editor = "R. L. Urbanke and M. K. Varanasi",
publisher = "IEEE",
_month = jun,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#convbinctw",
pdf = "http://www.hutter1.net/publ/convbinctw.pdf",
slides = "http://www.hutter1.net/publ/sconvbinctw.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
issn = "2157-8117",
isbn = "978-1-5386-4780-6",
doi = "10.1109/ISIT.2018.8437737",
keywords = "Context-tree weighting; KT estimator; frequency estimator; binarization;
stationary distribution; tree source; stationary ergodic source;
convergence rate; worst-case bounds.",
abstract = "This work investigates the convergence rate of learning the
stationary distribution of finite-alphabet stationary ergodic
sources using a binarized context-tree weighting approach. The
binarized context-tree weighting (CTW) algorithm estimates the
stationary distribution of a symbol as a product of conditional
distributions of each component bit, which are determined in a
sequential manner using the well known binary context-tree
weighting method. We establish that CTW algorithm is a consistent
estimator of the stationary distribution, and that the worst-case
$L_1$-prediction error between the CTW and frequency estimates
using $n$ source symbols each of which when binarized consists of
$k>1$ bits decays as $\Theta(\sqrt{2^k\log(n)/n})$.",
support = "ARC grants DP120100950 and DP150104590",
for = "080401(100%)",
seo = "970108(80%),890205(20%)",
}
@TechReport{Hutter:18align,
author = "Tom Everitt and Marcus Hutter",
title = "The Alignment Problem for History-Based {B}ayesian Reinforcement Learners",
pages = "70",
_month = jun,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#align",
http = "http://www.tomeveritt.se/papers/alignment.pdf",
pdf = "http://www.hutter1.net/publ/align.pdf",
slides = "http://www.hutter1.net/publ/salign.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs",
abstract = "Value alignment is often considered a critical component of safe
artificial intelligence. Meanwhile, reinforcement learning is often
criticized as being inherently unsafe and misaligned, for reasons
such as wireheading, delusionboxes, misspecified reward functions
and distributional shifts. In this paper, we categorize sources of
misalignment for reinforcement learn- ing agents, illustrating each
type with numerous examples. For each type of problem, we also
describe ways to remove the source of misalignment. Combined, the
suggestions form high-level blueprints for how to design value
aligned RL agents.",
support = "ARC grant DP150104590",
for = "080101(60%),220312(20%),080198(20%)",
seo = "970108(80%),970117(20%)",
note = "First winner of the AI alignment prize round 2:
http://www.lesswrong.com/posts/SSEyiHaACSYDHcYZz/announcement-ai-alignment-prize-round-2-winners-and-next",
}
@Article{Hutter:18aixicplexx,
author = "Jan Leike and Marcus Hutter",
title = "On the Computability of {S}olomonoff Induction and {AIXI}",
journal = "Theoretical Computer Science",
volume = "716",
pages = "28--49",
publisher = "Elsevier",
_month = mar,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#aixicplexx",
pdf = "http://www.hutter1.net/publ/aixicplexx.pdf",
slides = "http://www.hutter1.net/publ/saixicplex.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "0304-3975",
doi = "10.1016/j.tcs.2017.11.020",
keywords = "Solomonoff induction; AIXI; General reinforcement learning;
Knowledge-seeking agents; Computability; Arithmetical hierarchy.",
abstract = "How could we solve the machine learning and the artificial
intelligence problem if we had infinite computation? Solomonoff
induction and the reinforcement learning agent AIXI are proposed
answers to this question. Both are known to be incomputable. We
quantify this using the arithmetical hierarchy, and prove upper and
in most cases corresponding lower bounds for incomputability.
Moreover, we show that AIXI is not limit computable, thus it cannot
be approximated using finite computation. However there are limit
computable epsilon-optimal approximations to AIXI. We also derive
computability bounds for knowledge-seeking agents, and give a limit
computable weakly asymptotically optimal reinforcement learning
agent.",
support = "ARC grant DP150104590",
for = "080101(50%),080201(50%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:18piidkkt,
author = "Badri N. Vellambi and Owen Cameron and Marcus Hutter",
title = "Universal Compression of Piecewise i.i.d. Sources",
booktitle = "Proc. Data Compression Conference ({DCC'18})",
pages = "267--276",
_editor = "Ali Bilgin and Michael W. Marcellin and Joan Serra{-}Sagrist{\`{a}} and James A. Storer",
publisher = "IEEE Computer Society",
address = "Snowbird, Utah, USA",
_address = "Alamitos, CA (publisher)",
_month = mar,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#piidkkt",
pdf = "http://www.hutter1.net/publ/piidkkt.pdf",
slides = "http://www.hutter1.net/publ/spiidkkt.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
doi = "10.1109/DCC.2018.00035",
issn = "2375-0359",
isbn = "978-1-5386-4884-1",
keywords = "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
abstract = "We study the problem of compressing piecewise i.i.d. sources, which
models the practical application of jointly compressing multiple
disparate data files. We establish that universal compression of
piecewise i.i.d data is possible by modeling the data as a Markov
process whose memory grows logarithmically in the size of the data
using the Krichevsky-Trofimov (KT) estimator. The memory order is
chosen large enough so that the successful gleaning of the
distribution of the different pieces of the data from the
corresponding contexts is possible, and simultaneously small enough
that this learning can occur for almost any realization of any
piecewise data process.",
support = "ARC grants DP120100950 and DP150104590",
for = "080401(100%)",
seo = "970108(80%),890205(20%)",
}
@InCollection{Hutter:18uaitas,
author = "Tom Everitt and Marcus Hutter",
title = "Universal Artificial Intelligence: Practical Agents and Fundamental Challenges",
booktitle = "Foundations of Trusted Autonomy",
_series = "Studies in Systems, Decision and Control 117",
chapter = "2",
pages = "15--46",
editor = "Hussein A. Abbass and Jason Scholz and Darryn J. Reid",
publisher = "Springer",
_month = jan,
year = "2018",
bibtex = "http://www.hutter1.net/official/bib.htm#uaitas",
xurl = "http://arxiv.org/abs/1801.none",
pdf = "http://www.hutter1.net/publ/uaitas.pdf",
slides = "http://www.hutter1.net/publ/suaitas.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "2198-4182",
isbn = "978-3-319-64815-6",
doi = "10.1007/978-3-319-64816-3_2",
keywords = "foundations; general reinforcement learning; AI safety;
Solomonoff induction; intelligent agents.",
abstract = "Foundational theories have contributed greatly to scientific
progress in many fields. Examples include Zermelo-Fraenkel set
theory in mathematics, and universal Turing machines in computer
science. Universal Artificial Intelligence (UAI) is an increasingly
well-studied foundational theory for artificial intelligence, based
on ancient principles in the philosophy of science and modern
developments in information and probability theory. Importantly, it
refrains from making unrealistic Markov, ergodicity, or
stationarity assumptions on the environment. UAI provides a
theoretically optimal agent AIXI and principled ideas for
constructing practical autonomous agents. The theory also makes it
possible to establish formal results on the motivations of AI
systems. Such results may greatly enhance the trustability of
autonomous agents, and guide design choices towards more robust
agent architectures and incentive schemes. Finally, UAI offers a
deeper appreciation of fundamental problems such as the induction
problem and the exploration-exploitation dilemma.",
support = "ARC grant DP150104590",
for = "080101(80%),220312(20%)",
seo = "970108(80%),970117(20%)",
znote = "68500+ downloads in 2018. Top 10 most downloaded Springer books in 2018 across all Engineering:
http://www.springer.com/gp/campaigns/highlights-2018/engineering-2018",
}
%-------------Publications-of-Marcus-Hutter-2017--------------%
@InCollection{Hutter:17unilearn,
author = "Marcus Hutter",
title = "Universal Learning Theory",
booktitle = "Encyclopedia of Machine Learning and Data Mining",
pages = "1295--1304",
editor = "C. Sammut and G. Webb",
publisher = "Springer",
_month = aug,
year = "2017",
edition = "2nd",
bibtex = "http://www.hutter1.net/official/bib.htm#unilearn",
url = "http://arxiv.org/abs/1102.2467",
pdf = "http://www.hutter1.net/publ/unilearn.pdf",
latex = "http://www.hutter1.net/publ/unilearn.tex",
slides = "http://www.hutter1.net/ai/susp.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/978-1-4899-7687-1_867",
isbn = "978-1-4899-7686-4",
keywords = "Algorithmic probability; Ray Solomonoff; induction;
prediction; decision; action; Turing machine;
Kolmogorov complexity; universal prior; Bayes' rule.",
abstract = "This encyclopedic article gives a mini-introduction into the
theory of universal learning, founded by Ray Solomonoff in the
1960s and significantly developed and extended in the last
decade. It explains the spirit of universal learning, but
necessarily glosses over technical subtleties.",
for = "080401(30%),010405(30%),080198(40%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:17thompgrls,
author = "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter",
title = "On {T}hompson Sampling and Asymptotic Optimality",
booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
address = "Melbourne, Australia",
_editor = "Carles Sierra",
_publisher = "IJCAI",
pages = "4889--4893",
_month = aug,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#thompgrls",
url = "http://arxiv.org/abs/1602.07905",
pdf = "http://www.hutter1.net/publ/thompgrls.pdf",
slides = "http://www.hutter1.net/publ/sthompgrl.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-0-9992411-0-3",
doi = "10.24963/ijcai.2017/688",
keywords = "General reinforcement learning; Thompson sampling;
asymptotic optimality; regret; discounting; recoverability; AIXI",
abstract = "We discuss some recent results on Thompson sampling
for nonparametric reinforcement learning in
countable classes of general stochastic environments.
These environments can be non-Markovian,
non-ergodic, and partially observable. We show
that Thompson sampling learns the environment
class in the sense that (1) asymptotically its value
converges in mean to the optimal value and (2)
given a recoverability assumption regret is sublinear.
We conclude with a discussion about optimality
in reinforcement learning.",
support = "ARC grant DP150104590",
note = "Best sister conferences paper track",
for = "080101(60%),010404(40%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:17corruptrl,
author = "Tom Everitt and Victoria Krakovna and Laurent Orseau and Marcus Hutter and Shane Legg",
title = "Reinforcement Learning with a Corrupted Reward Channel",
booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
address = "Melbourne, Australia",
_editor = "Carles Sierra",
_publisher = "IJCAI",
pages = "4705--4713",
_month = aug,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#corruptrl",
url = "http://arxiv.org/abs/1705.08417",
pdf = "http://www.hutter1.net/publ/corruptrl.pdf",
slides = "http://www.hutter1.net/publ/scorruptrl.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
isbn = "978-0-9992411-0-3",
doi = "10.24963/ijcai.2017/656",
keywords = "decoupled reinforcement learning; reward corruption;
quantilisation; robustness; value learning.",
abstract = "No real-world reward function is perfect. Sensory errors and
software bugs may result in agents getting higher (or lower)
rewards than they should. For example, a reinforcement learning
agent may prefer states where a sensory error gives it the maximum
reward, but where the true reward is actually small. We formalise
this problem as a generalised Markov Decision Problem called
Corrupt Reward MDP. Traditional RL methods fare poorly in CRMDPs,
even under strong simplifying assumptions and when trying to
compensate for the possibly corrupt rewards. Two ways around the
problem are investigated. First, by giving the agent richer data,
such as in inverse reinforcement learning and semi-supervised
reinforcement learning, reward corruption stemming from systematic
sensory errors may sometimes be completely managed. Second, by
using randomisation to blunt the agent's optimisation, reward
corruption can be partially managed under some assumptions.",
support = "ARC grant DP150104590",
for = "080101(80%),220312(20%)",
seo = "970108(80%),970117(20%)",
znote = "Acceptance rate: 660/2540=26\%",
}
@InProceedings{Hutter:17cbefsrl,
author = "Jarryd Martin and Suraj Narayanan Sasikumar and Tom Everitt and Marcus Hutter",
title = "Count-Based Exploration in Feature Space for Reinforcement Learning",
booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
address = "Melbourne, Australia",
_editor = "Carles Sierra",
_publisher = "IJCAI",
pages = "2471--2478",
_month = aug,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#cbefsrl",
url = "http://arxiv.org/abs/1706.08090",
pdf = "http://www.hutter1.net/publ/cbefsrl.pdf",
slides = "http://www.hutter1.net/publ/scbefsrl.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
isbn = "978-0-9992411-0-3",
doi = "10.24963/ijcai.2017/344",
keywords = "reinforcement learning; Markov decision process; planning under uncertainty;
sequential decision making; count-based exploration; ",
abstract = "We introduce a new count-based optimistic exploration algorithm for
reinforcement learning (RL) that is feasible in environments with
high-dimensional state-action spaces. The success of RL algorithms
in these domains depends crucially on generalisation from limited
training experience. Function approximation techniques enable RL
agents to generalise in order to estimate the value of unvisited
states, but at present few methods enable generalisation regarding
uncertainty. This has prevented the combination of scalable RL
algorithms with efficient exploration strategies that drive the
agent to reduce its uncertainty. We present a new method for
computing a generalised state visit-count, which allows the agent
to estimate the uncertainty associated with any state. Our
phi-pseudo-count achieves generalisation by exploiting the same
feature representation of the state space that is used for value
function approximation. States that have less frequently observed
features are deemed more uncertain. The phi-Exploration-Bonus
algorithm rewards the agent for exploring in feature space rather
than in the untransformed state space. The method is simpler and
less computationally expensive than some previous proposals, and
achieves near state-of-the-art results on high-dimensional RL
benchmarks.",
support = "ARC grant DP150104590",
for = "080199(50%),080101(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 660/2540=26\%
Also presented at SURL'17 http://www.surl.tirl.info/",
}
@InProceedings{Hutter:17urlsurexp,
author = "John Aslanides and Jan Leike and Marcus Hutter",
title = "Universal Reinforcement Learning Algorithms: Survey and Experiments",
booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})",
address = "Melbourne, Australia",
_editor = "Carles Sierra",
_publisher = "IJCAI",
pages = "1403--1410",
_month = aug,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#urlsurexp",
url = "http://arxiv.org/abs/1705.10557",
pdf = "http://www.hutter1.net/publ/urlsurexp.pdf",
slides = "http://www.hutter1.net/publ/surlsurexp.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
demo = "http://www.hutter1.net/aixijs/",
code = "http://github.com/aslanides/aixijs",
isbn = "978-0-9992411-0-3",
doi = "10.24963/ijcai.2017/194",
keywords = "universal reinforcement learning; multi-agent system;
sequential decision making; survey; online demo; java code.",
abstract = "Many state-of-the-art reinforcement learning (RL) algorithms
typically assume that the environment is an ergodic Markov Decision
Process (MDP). In contrast, the field of universal reinforcement
learning (URL) is concerned with algorithms that make as few
assumptions as possible about the environment. The universal
Bayesian agent AIXI and a family of related URL algorithms have
been developed in this setting. While numerous theoretical
optimality results have been proven for these agents, there has
been no empirical investigation of their behavior to date. We
present a short and accessible survey of these URL algorithms under
a unified notation and framework, along with results of some
experiments that qualitatively illustrate some properties of the
resulting policies, and their relative performance on
partially-observable grid-world environments. We also present an
open-source reference implementation of the algorithms which we
hope will facilitate further understanding of, and experimentation
with, these ideas.",
support = "ARC grant DP150104590",
for = "080199(40%),080101(40%),010404(10%),010405(10%)",
seo = "970108(100%)",
znote = "Acceptance rate: 660/2540=26\%",
}
@InProceedings{Hutter:17offswitch,
author = "Tobias Wängberg and Mikael Böörs and Elliot Catt and Tom Everitt and Marcus Hutter",
title = "A Game-Theoretic Analysis of The Off-Switch Game",
booktitle = "Proc. 10th Conf. on Artificial General Intelligence ({AGI'17})",
address = "Melbourne, Australia",
series = "LNAI",
volume = "10414",
pages = "167--177",
_editor = "Tom Everitt and Ben Goertzel and Alexey Potapov",
publisher = "Springer",
_month = aug,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#offswitch",
url = "http://arxiv.org/abs/1708.03871",
pdf = "http://www.hutter1.net/publ/offswitch.pdf",
slides = "http://www.hutter1.net/publ/soffswitch.pdf",
project = "http://www.hutter1.net/official/projects.htm#safe",
doi = "10.1007/978-3-319-63703-7_16",
issn = "0302-9743",
isbn = "978-3-319-63702-0",
keywords = "AI safety; corrigibility; intelligent agents; game theory; uncertainty.",
abstract = "The off-switch game is a game theoretic model of a highly
intelligent robot interacting with a human. In the original paper
by Hadfield-Menell et al. (2016b), the analysis is not fully
game-theoretic as the human is modelled as an irrational player,
and the robot’s best action is only calculated under unrealistic
normality and soft-max assumptions. In this paper, we make the
analysis fully game theoretic, by modelling the human as a rational
player with a random utility function. As a consequence, we are
able to easily calculate the robot’s best action for arbitrary
belief and irrationality assumptions.",
for = "080101(80%),220312(20%)",
seo = "970108(80%),970117(20%)",
znote = "Also presented at PT-AI 2017.
http://www.pt-ai.org/2017/papers
Acceptance rate: 28/77 = 36\% (oral presentation) [51/77=66\% incl. posters].",
}
@InProceedings{Hutter:17expdisc,
author = "Sean Lamont and John Aslanides and Jan Leike and Marcus Hutter",
title = "Generalised Discount Functions applied to a {M}onte-{C}arlo {AI}$\mu$ Implementation",
booktitle = "Proc. 16th Conf. on Autonomous Agents and MultiAgent Systems ({AAMAS'17})",
pages = "1589--1591",
_editor = "Sanmay Das and Ed Durfee and Kate Larson and Michael Winikoff",
_publisher = "International Foundation for Autonomous Agents and Multiagent Systems",
address = "Sao Paulo, Brazil",
_month = may,
year = "2017",
bibtex = "http://www.hutter1.net/official/bib.htm#expdisc",
http = "http://dl.acm.org/citation.cfm?id=3091372",
url = "http://arxiv.org/abs/1703.01358",
pdf = "http://www.hutter1.net/publ/expdisc.pdf",
latex = "http://www.hutter1.net/publ/expdisc.tex",
slides = "http://www.hutter1.net/publ/sexpdisc.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
demo = "http://www.hutter1.net/aixijs/",
code = "http://github.com/aslanides/aixijs",
keywords = "Monte Carlo; discount function; reinforcement learning; time consistency",
abstract = "In recent years, work has been done to develop the theory of
General Reinforcement Learning (GRL). However, there are no
examples demonstrating the known results regarding generalised
discounting. We have added to the GRL simulation platform (AIXIjs)
the functionality to assign an agent arbitrary discount functions,
and an environment which can be used to determine the effect of
discounting on an agent's policy. Using this, we investigate how
geometric, hyperbolic and power discounting affect an informed
agent in a simple MDP. We experimentally reproduce a number of
theoretical results, and discuss some related subtleties. It was
found that the agent's behaviour followed what is expected
theoretically, assuming appropriate parameters were chosen for the
Monte-Carlo Tree Search (MCTS) planning algorithm.",
support = "ARC grant DP150104590",
for = "080199(40%),080101(40%),010404(10%),010405(10%)",
seo = "970108(100%)",
znote = "Acceptance rate: 276/567 = 49\%",
}
%-------------Publications-of-Marcus-Hutter-2016--------------%
@Article{Hutter:16exsaggx,
author = "Marcus Hutter",
title = "Extreme State Aggregation beyond {M}arkov Decision Processes",
journal = "Theoretical Computer Science",
volume = "650",
pages = "73--91",
publisher = "Elsevier",
_month = oct,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#exsaggx",
url = "http://arxiv.org/abs/1407.3341",
pdf = "http://www.hutter1.net/publ/exsaggx.pdf",
latex = "http://www.hutter1.net/publ/exsaggx.tex",
slides = "http://www.hutter1.net/publ/sexsagg.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
issn = "0304-3975",
doi = "10.1016/j.tcs.2016.07.032",
keywords = "State aggregation; Reinforcement learning; Non-MDP",
abstract = "We consider a Reinforcement Learning setup where an agent interacts
with an environment in observation–reward–action cycles without any
(esp. MDP) assumptions on the environment. State aggregation and
more generally feature reinforcement learning is concerned with
mapping histories/raw-states to reduced/aggregated states. The idea
behind both is that the resulting reduced process (approximately)
forms a small stationary finite-state MDP, which can then be
efficiently solved or learnt. We considerably generalize existing
aggregation results by showing that even if the reduced process is
not an MDP, the (q-)value functions and (optimal) policies of an
associated MDP with same state-space size solve the original
problem, as long as the solution can approximately be represented
as a function of the reduced states. This implies an upper bound on
the required state space size that holds uniformly for all RL
problems. It may also explain why RL algorithms designed for MDPs
sometimes perform well beyond MDPs.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:16aixideath,
author = "Jarryd Martin and Tom Everitt and Marcus Hutter",
title = "Death and Suicide in Universal Artificial Intelligence",
booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
address = "New York, USA",
series = "LNAI",
volume = "9782",
pages = "23--32",
_editor = "Bas Steunebrink and Pei Wang and Ben Goertzel",
publisher = "Springer",
_month = jul,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#aixideath",
url = "http://arxiv.org/abs/1606.00652",
pdf = "http://www.hutter1.net/publ/aixideath.pdf",
latex = "http://www.hutter1.net/publ/aixideath.tex",
slides = "http://www.hutter1.net/publ/saixideath.pdf",
video = "http://youtu.be/c__OjDHqFs",
project = "http://www.hutter1.net/official/projects.htm#safe",
doi = "10.1007/978-3-319-41649-6_3",
issn = "0302-9743",
isbn = "978-3-319-41648-9",
keywords = "intelligent agents; death; suicide; aixi; reinforcement learning; semimeasure",
abstract = "Reinforcement learning (RL) is a general paradigm for studying
intelligent behaviour, with applications ranging from artificial
intelligence to psychology and economics. AIXI is a universal
solution to the RL problem; it can learn any computable environment.
A technical subtlety of AIXI is that it is defined using a mixture over
semimeasures that need not sum to 1, rather than
over proper probability measures. In this work we argue that
the shortfall of a semimeasure can naturally be interpreted as
the agent's estimate of the probability of its death. We formally define
death for generally intelligent agents like AIXI, and prove a number
of related theorems about their behaviour. Notable discoveries
include that agent behaviour can change radically under positive linear
transformations of the reward signal (from suicidal to
dogmatically self-preserving), and that the agent's posterior belief
that it will survive increases over time.",
support = "ARC grant DP150104590",
for = "080101(80%),220312(20%)",
seo = "970108(80%),970122(10%),970117(10%)",
znote = "Acceptance rate: 24/67 = 36\%",
}
@InProceedings{Hutter:16wirehead,
author = "Tom Everitt and Marcus Hutter",
title = "Avoiding Wireheading with Value Reinforcement Learning",
booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
address = "New York, USA",
series = "LNAI",
volume = "9782",
pages = "12--22",
_editor = "Bas Steunebrink and Pei Wang and Ben Goertzel",
publisher = "Springer",
_month = jul,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#wirehead",
url = "http://arxiv.org/abs/1605.03143",
pdf = "http://www.hutter1.net/publ/wirehead.pdf",
latex = "http://www.hutter1.net/publ/wirehead.tex",
slides = "http://www.hutter1.net/publ/swirehead.pdf",
video = "http://youtu.be/sqFc2-_mDCk",
project = "http://www.hutter1.net/official/projects.htm#safe",
doi = "10.1007/978-3-319-41649-6_2",
issn = "0302-9743",
isbn = "978-3-319-41648-9",
keywords = "intelligent agents; reinforcement learning; wireheading; value RL; utility function; safety",
abstract = "How can we design good goals for arbitrarily intelligent agents? Reinforcement
learning (RL) is a natural approach. Unfortunately, RL does not work well for
generally intelligent agents, as RL agents are incentivised to shortcut the
reward sensor for maximum reward -- the so-called wireheading problem. In this
paper we suggest an alternative to RL called value reinforcement learning (VRL).
In VRL, agents use the reward signal to learn a utility function. The VRL setup
allows us to remove the incentive to wirehead by placing a constraint on the
agent's actions. The constraint is defined in terms of the agent's belief
distributions, and does not require an explicit specification of which actions
constitute wireheading.",
support = "ARC grant DP150104590",
for = "080101(70%),220312(30%)",
seo = "970108(60%),970122(20%),970117(20%)",
znote = "Acceptance rate: 24/67 = 36\%",
}
@InProceedings{Hutter:16selfmod,
author = "Tom Everitt and Daniel Filan and Mayank Daswani and Marcus Hutter",
title = "Self-Modification of Policy and Utility Function in Rational Agents",
booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})",
address = "New York, USA",
series = "LNAI",
volume = "9782",
pages = "1--11",
_editor = "Bas Steunebrink and Pei Wang and Ben Goertzel",
publisher = "Springer",
_month = jul,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#selfmod",
url = "http://arxiv.org/abs/1605.03142",
pdf = "http://www.hutter1.net/publ/selfmod.pdf",
latex = "http://www.hutter1.net/publ/selfmod.tex",
video = "http://youtu.be/sqFc2-_mDCk",
award = "http://agi-conf.org/2016/prizes/",
project = "http://www.hutter1.net/official/projects.htm#safe",
doi = "10.1007/978-3-319-41649-6_1",
issn = "0302-9743",
isbn = "978-3-319-41648-9",
keywords = "intelligent agents; self-modification; goal preservation; utility functions; control problem; safety",
abstract = "Any agent that is part of the environment it interacts with and has versatile
actuators (such as arms and fingers), will in principle have the ability to
self-modify -- for example by changing its own source code. As we continue to
create more and more intelligent agents, chances increase that they will learn
about this ability. The question is: will they want to use it? For example,
highly intelligent systems may find ways to change their goals to something more
easily achievable, thereby `escaping' the control of their designers. In an
important paper, Omohundro (2008) argued that goal preservation is a fundamental
drive of any intelligent system, since a goal is more likely to be achieved if
future versions of the agent strive towards the same goal. In this paper, we
formalise this argument in general reinforcement learning, and explore
situations where it fails. Our conclusion is that the self-modification
possibility is harmless if and only if the value function of the agent
anticipates the consequences of self-modifications and use the current utility
function when evaluating the future.",
support = "ARC grant DP150104590",
for = "080101(70%),220312(30%)",
seo = "970108(60%),970122(20%),970117(20%)",
znote = "Acceptance rate: 24/67 = 36\%",
note = "Winner of the Kurzweil Prize for Best AGI Paper",
}
@InProceedings{Hutter:16vacrecog,
author = "Basura Fernando and Peter Anderson and Marcus Hutter and Stephen Gould",
title = "Discriminative Hierarchical Rank Pooling for Activity Recognition",
booktitle = "Proc. IEEE Conference on Computer Vision and Pattern Recognition ({CVPR'16})",
address = "Las Vegas, NV, USA",
pages = "1924--1932",
_editor = "Lourdes Agapito, Tamara Berg, Jana Kosecka, Lihi Zelnik-Manor",
publisher = "IEEE",
_month = jun,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#vacrecog",
pdf = "http://www.hutter1.net/publ/vacrecog.pdf",
project = "http://www.hutter1.net/official/projects.htm#cvip",
code = "http://www.hutter1.net/publ/varcode.zip",
issn = "1063-6919",
doi = "10.1109/CVPR.2016.212",
keywords = "rank pooling; activity classification; hierarchy; video; training;
convolutional neural network; nonlinear feature functions",
abstract = "We present hierarchical rank pooling, a video sequence encoding
method for activity recognition. It consists of a network of rank
pooling functions which captures the dynamics of rich convolutional
neural network features within a video sequence. By stacking
non-linear feature functions and rank pooling over one another, we
obtain a high capacity dynamic encoding mechanism, which is used
for action recognition. We present a method for jointly learning
the video representation and activity classifier parameters.
Our method obtains state-of-the art results on three important
activity recognition benchmarks: 76.7\% on Hollywood2,
66.9\% on HMDB51 and, 91.4\% on UCF101.",
for = "080104(50%),080106(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 643/1865 = 30\%",
}
@InProceedings{Hutter:16thompgrl,
author = "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter",
title = "Thompson Sampling is Asymptotically Optimal in General Environments",
booktitle = "Proc. 32nd International Conf. on Uncertainty in Artificial Intelligence ({UAI'16})",
address = "New Jersey, USA",
_editor = "Alexander Ihler and Dominik Janzing",
publisher = "AUAI Press",
pages = "417--426",
_month = jun,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#thompgrl",
http = "http://auai.org/uai2016/proceedings/papers/20.pdf",
url = "http://arxiv.org/abs/1602.07905",
pdf = "http://www.hutter1.net/publ/thompgrl.pdf",
latex = "http://www.hutter1.net/publ/thompgrl.tex",
slides = "http://www.hutter1.net/publ/sthompgrl.pdf",
award = "http://auai.org/uai2016/program.php",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-0-9966431-1-5",
keywords = "General reinforcement learning; Thompson sampling;
asymptotic optimality; regret; discounting; recoverability; AIXI",
abstract = "We discuss a variant of Thompson sampling for nonparametric
reinforcement learning in countable classes of general stochastic
environments. These environments can be non-Markov, nonergodic, and
partially observable. We show that Thompson sampling learns the
environment class in the sense that (1) asymptotically its value
converges to the optimal value in mean and (2) given a
recoverability assumption regret is sublinear.",
support = "ARC grant DP150104590",
for = "080101(60%),010404(40%)",
seo = "970108(100%)",
note = "Best student paper",
znote = "Acceptance rate: 26/275 = 9\% (oral!) [85/275 = 31\% incl. poster]",
}
@InProceedings{Hutter:16speedprior,
author = "Daniel Filan and Jan Leike and Marcus Hutter",
title = "Loss Bounds and Time Complexity for Speed Priors",
booktitle = "Proc. 19th International Conf. on Artificial Intelligence and Statistics ({AISTATS'16})",
address = "Cadiz, Spain",
volume = "51",
_editor = "Arthur Gretton and Christian Robert",
publisher = "Microtome",
pages = "1394--1402",
_month = may,
year = "2016",
bibtex = "http://www.hutter1.net/official/bib.htm#speedprior",
http = "http://jmlr.org/proceedings/papers/v51/",
url = "http://arxiv.org/abs/1604.03343",
pdf = "http://www.hutter1.net/publ/speedprior.pdf",
latex = "http://www.hutter1.net/publ/speedprior.tex",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "0302-9743",
keywords = "universal distribution; speed prior; computational complexity; predictive performance; upper bounds.",
abstract = "This paper establishes for the first time the predictive
performance of speed priors and their computational complexity. A
speed prior is essentially a probability distribution that puts low
probability on strings that are not efficiently computable. We
propose a variant to the original speed prior (Schmidhuber, 2002),
and show that our prior can predict sequences drawn from
probability measures that are estimable in polynomial time. Our
speed prior is computable in doubly-exponential time, but not in
polynomial time. On a polynomial time computable sequence our speed
prior is computable in exponential time. We show better upper
complexity bounds for Schmidhuber's speed prior under the same
conditions, and that it predicts deterministic sequences that are
computable in polynomial time; however, we also show that it is not
computable in polynomial time, and the question of its predictive
properties for stochastic sequences remains open.",
support = "ARC grant DP150104590",
for = "080101(20%),080201(30%),080401(20%),010404(30%)",
seo = "970108(100%)",
znote = "Acceptance rate: 165/537 = 31\%",
}
%-------------Publications-of-Marcus-Hutter-2015--------------%
@InProceedings{Hutter:15metasearch1,
author = "Tom Everitt and Marcus Hutter",
title = "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art I: {T}ree Search",
booktitle = "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})",
address = "Canberra, Australia",
series = "LNAI",
volume = "9457",
_editor = "Bernhard Pfahringer and Jochen Renz",
publisher = "Springer",
pages = " 157--165",
_month = dec,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#metasearch1",
url = "http://arxiv.org/abs/1509.02709",
pdf = "http://www.hutter1.net/publ/metasearch1.pdf",
slides = "http://www.hutter1.net/publ/smetasearch.pdf",
project = "http://www.hutter1.net/official/projects.htm#search",
code = "http://www.hutter1.net/publ/metasearchcode.zip",
issn = "0302-9743",
isbn = "978-3-319-26349-6",
doi = "10.1007/978-3-319-26350-2_14",
keywords = "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics,
Tree Search, Probabilistic Goal Distribution",
abstract = "BFS and DFS are the two most fundamental search algo-
rithms. We derive approximations of their expected runtimes in complete
trees, as a function of tree depth and probabilistic goal distribution. We
also demonstrate that the analytical approximations are close to the
empirical averages for most parameter settings, and that the results can
be used to predict the best algorithm given the relevant problem features.",
for = "080199(50%),080201(50%)",
seo = "970108(100%)",
znote = "Acceptance rate (all papers): 57/102 = 56\%",
}
@InProceedings{Hutter:15metasearch2,
author = "Tom Everitt and Marcus Hutter",
title = "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art II: {G}raph Search",
booktitle = "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})",
address = "Canberra, Australia",
series = "LNAI",
volume = "9457",
_editor = "Bernhard Pfahringer and Jochen Renz",
publisher = "Springer",
pages = "166--178",
_month = dec,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#metasearch2",
url = "http://arxiv.org/abs/1509.02709",
pdf = "http://www.hutter1.net/publ/metasearch2.pdf",
slides = "http://www.hutter1.net/publ/smetasearch.pdf",
project = "http://www.hutter1.net/official/projects.htm#search",
code = "http://www.hutter1.net/publ/metasearchcode.zip",
issn = "0302-9743",
isbn = "978-3-319-26349-6",
doi = "10.1007/978-3-319-26350-2_15",
keywords = "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics,
Graph Search, Probabilistic Goal Distribution",
abstract = "The algorithm selection problem asks to select the best algorithm for
a given problem. In the companion paper (Everitt and Hutter, AusAI, 2015),
expected BFS and DFS tree search runtime was approximated
as a function of tree depth and probabilistic goal distribution. Here we
provide an analogous analysis of BFS and DFS graph search, deriving
expected runtime as a function of graph structure and goal distribution.
The applicability of the method is demonstrated through analysis of two
different grammar problems. The approximations come surprisingly close
to empirical reality.",
for = "080199(50%),080201(50%)",
seo = "970108(100%)",
znote = "Acceptance rate (full papers): 39/102 = 38\%",
}
@InProceedings{Hutter:15sikscplex,
author = "Jan Leike and Marcus Hutter",
title = "On the Computability of {S}olomonoff Induction and Knowledge-Seeking",
booktitle = "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})",
address = "Banff, Canada",
series = "LNAI",
volume = "9355",
_editor = "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles",
publisher = "Springer",
pages = "364--378",
_month = oct,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#sikscplex",
url = "http://arxiv.org/abs/1507.04124",
pdf = "http://www.hutter1.net/publ/sikscplex.pdf",
slides = "http://www.hutter1.net/publ/ssikscplex.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "0302-9743",
isbn = "978-3-319-24485-3",
doi = "10.1007/978-3-319-24486-0_24",
keywords = "Solomonoff induction; Exploration; Knowledge-seeking agents;
General reinforcement learning; Asymptotic optimality; Computability;
Complexity; Arithmetical hierarchy; Universal turing machine; AIXI; BayesExp",
abstract = "Solomonoff induction is held as a gold standard for learning,
but it is known to be incomputable. We quantify its incomputability
by placing various flavors of Solomonoff's prior M in the arithmetical
hierarchy. We also derive computability bounds for knowledge-seeking
agents, and give a limit-computable weakly asymptotically optimal reinforcement
learning agent.",
support = "ARC grant DP150104590",
for = "080101(50%),080201(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 23/46 = 50\%",
}
@InProceedings{Hutter:15solraven,
author = "Jan Leike and Marcus Hutter",
title = "Solomonoff Induction Violates {N}icod's Criterion",
booktitle = "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})",
address = "Banff, Canada",
series = "LNAI",
volume = "9355",
_editor = "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles",
publisher = "Springer",
pages = "349--363",
_month = oct,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#solraven",
url = "http://arxiv.org/abs/1507.04121",
pdf = "http://www.hutter1.net/publ/solraven.pdf",
slides = "http://www.hutter1.net/publ/ssolraven.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
code = "http://www.hutter1.net/publ/solraven.cpp",
issn = "0302-9743",
isbn = "978-3-319-24485-3",
doi = "10.1007/978-3-319-24486-0_23",
keywords = "Bayesian reasoning; Confirmation; Disconfirmation; Hempel’s paradox;
Equivalence condition; Solomonoff normalization",
abstract = "Nicod's criterion states that observing a black raven is evidence
for the hypothesis H that all ravens are black. We show that
Solomonoff induction does not satisfy Nicod's criterion: there are
time steps in which observing black ravens decreases the belief in
H. Moreover, while observing any computable infinite string
compatible with H, the belief in H decreases infinitely often when
using the unnormalized Solomonoff prior, but only finitely often
when using the normalized Solomonoff prior. We argue that the fault
is not with Solomonoff induction; instead we should reject Nicod’s
criterion.",
note = "Also presented at CCR: http://math.uni-heidelberg.de/logic/conferences/ccr2015/",
support = "ARC grant DP150104590",
for = "080199(50%),220399(50%)",
seo = "970108(50%),970122(50%)",
znote = "Acceptance rate: 23/46 = 50\%",
}
@InProceedings{Hutter:15seqdts,
author = "Tom Everitt and Jan Leike and Marcus Hutter",
title = "Sequential Extensions of Causal and Evidential Decision Theory",
booktitle = "Proc. 4th International Conf. on Algorithmic Decision Theory ({ADT'15})",
address = "Lexington, USA",
series = "LNAI",
volume = "9346",
_editor = "Toby Walsh",
publisher = "Springer",
pages = "205--221",
_month = sep,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#seqdts",
url = "http://arxiv.org/abs/1506.07359",
pdf = "http://www.hutter1.net/publ/seqdts.pdf",
slides = "http://www.hutter1.net/publ/sseqdts.pdf",
project = "http://www.hutter1.net/official/projects.htm#universal",
issn = "0302-9743",
isbn = "978-3-319-23113-6",
doi = "10.1007/978-3-319-23114-3_13",
keywords = "Evidential decision theory; Causal decision theory;
Planning; Causal graphical models; Dualism; Physicalism",
abstract = "Moving beyond the dualistic view in AI where agent and environment
are separated incurs new challenges for decision making, as
calculation of expected utility is no longer straightforward. The
non-dualistic decision theory literature is split between causal
decision theory and evidential decision theory. We extend these
decision algorithms to the sequential setting where the agent
alternates between taking actions and observing their consequences.
We find that evidential decision theory has two natural extensions
while causal decision theory only has one.",
support = "ARC grant DP150104590",
for = "080101(50%),220302(50%)",
seo = "970108(50%),270122(50%)",
znote = "Acceptance rate: 32/70 = 45\%",
}
@Article{Hutter:15ratagentx,
author = "Peter Sunehag and Marcus Hutter",
title = "Rationality, Optimism and Guarantees in General Reinforcement Learning",
journal = "Journal of Machine Learning Research",
volume = "16",
pages = "1345--1390",
publisher = "Microtome",
_address = "Princeton, NJ, USA",
_month = aug,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#ratagentx",
url = "http://jmlr.org/papers/v16/sunehag15a.html",
pdf = "http://www.hutter1.net/publ/ratagentx.pdf",
slides = "http://www.hutter1.net/publ/sagproblaws.pdf",
slides = "http://www.hutter1.net/publ/soptcog.pdf",
slides = "http://www.hutter1.net/publ/sagscilaws.pdf",
slides = "http://www.hutter1.net/publ/saixiopt.pdf",
slides = "http://www.hutter1.net/publ/soptopt.pdf",
slides = "http://www.hutter1.net/publ/saixiaxiom.pdf",
slides = "http://www.hutter1.net/publ/saixiaxiom2.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "1532-4435",
keywords = "Reinforcement Learning, Rationality, Optimism, Optimality, Error bounds",
abstract = "In this article, we present a top-down theoretical study of general
reinforcement learning agents. We begin with rational agents with
unlimited resources and then move to a setting where an agent can
only maintain a limited number of hypotheses and optimizes plans
over a horizon much shorter than what the agent designer actually
wants. We axiomatize what is rational in such a setting in a manner
that enables optimism, which is important to achieve systematic
explorative behavior. Then, within the class of agents deemed
rational, we achieve convergence and finite-error bounds. Such
results are desirable since they imply that the agent learns well
from its experiences, but the bounds do not directly guarantee good
performance and can be achieved by agents doing things one should
obviously not. Good performance cannot in fact be guaranteed for
any agent in fully general settings. Our approach is to design
agents that learn well from experience and act rationally. We
introduce a framework for general reinforcement learning agents
based on rationality axioms for a decision function and an
hypothesis-generating function designed so as to achieve guarantees
on the number errors. We will consistently use an optimistic
decision function but the hypothesis-generating function needs to
change depending on what is known/assumed. We investigate a number
of natural situations having either a frequentist or Bayesian flavor,
deterministic or stochastic environments and either finite or
countable hypothesis class. Further, to achieve sufficiently good
bounds as to hold promise for practical success we introduce a
notion of a class of environments being generated by a set of laws.
None of the above has previously been done for fully general
reinforcement learning environments.",
support = "ARC grant DP120100950",
for = "080101(60%),010404(30%),220302(10%)",
seo = "970108(90%),970122(10%)",
}
@Article{Hutter:15mnonconvx,
author = "Tor Lattimore and Marcus Hutter",
title = "On {M}artin-L{\"o}f (Non)Convergence of {S}olomonoff's Universal Mixture",
journal = "Theoretical Computer Science",
volume = "588",
pages = "2--15",
publisher = "Elsevier",
_month = jul,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#mnonconvx",
pdf = "http://www.hutter1.net/publ/mnonconvx.pdf",
slides = "http://www.hutter1.net/publ/smnonconv.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "0304-3975",
doi = "10.1016/j.tcs.2014.12.004",
keywords = "Solomonoff induction, Kolmogorov complexity, theory of computation.",
abstract = "We study the convergence of Solomonoff's universal mixture on
individual Martin-Löf random sequences. A new result is presented
extending the work of Hutter and Muchnik [3] by showing that there
does not exist a universal mixture that converges on all Martin-Löf
random sequences. We show that this is not an artifact of the fact
that the universal mixture is not a proper measure and that the
normalised universal mixture also fails to converge on all
Martin-Löf random sequences.",
for = "080401(50%),010404(30%),010405(20%)",
seo = "970101(30%),970108(70%)",
}
@InProceedings{Hutter:15learncnf,
author = "Joel Veness and Marcus Hutter and Laurent Orseau and Marc Bellemare",
title = "Online Learning of {k-CNF} Boolean Functions",
booktitle = "Proc. 24th International Joint Conf. on Artificial Intelligence ({IJCAI'15})",
address = "Buenos Aires, Argentina",
_editor = "Qiang Yang and Michael Wooldridge",
publisher = "AAAI Press",
pages = "3865--3873",
_month = jul,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#learncnf",
url = "http://arxiv.org/abs/1403.6863",
pdf = "http://www.hutter1.net/publ/learncnf.pdf",
slides = "http://www.hutter1.net/publ/slearncnf.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
isbn = "978-1-57735-738-4",
keywords = "k-CNF, Online Learning, Logarithmic Loss, Bayesian algorithm",
abstract = "This paper revisits the problem of learning a k-CNF Boolean function
from examples, for fixed k, in the context of online learning under
the logarithmic loss. We give a Bayesian interpretation to one of
Valiant's classic PAC learning algorithms, which we then build upon
to derive three efficient, online, probabilistic, supervised
learning algorithms for predicting the output of an unknown k-CNF
Boolean function. We analyze the loss of our methods, and show that
the cumulative log-loss can be upper bounded by a polynomial
function of the size of each example.",
support = "ARC grant DP150104590",
for = "080101(30%),010404(30%),080201(40%)",
seo = "970108(100%)",
znote = "Acceptance rate: 572/1996 = 29\%",
}
@InProceedings{Hutter:15agproblaws,
author = "Peter Sunehag and Marcus Hutter",
title = "Using Localization and Factorization to Reduce the Complexity of Reinforcement Learning",
booktitle = "Proc. 8th Conf. on Artificial General Intelligence ({AGI'15})",
address = "Berlin, Germany",
series = "LNAI",
volume = "9205",
pages = "177--186",
_editor = "Jordi Bieger and Ben Goertzel and Alexey Potapov",
publisher = "Springer",
_month = jul,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#agproblaws",
pdf = "http://www.hutter1.net/publ/agproblaws.pdf",
slides = "http://www.hutter1.net/publ/sagproblaws.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-319-21365-1_19",
issn = "0302-9743",
isbn = "978-3-319-21364-4",
keywords = "reinforcement learning; laws; optimism; bounds",
abstract = "General reinforcement learning is a powerful framework for
artificial intelligence that has seen much theoretical progress since introduced
fifteen years ago. We have previously provided guarantees for
cases with finitely many possible environments. Though the results are
the best possible in general, a linear dependence on the size of the hypothesis
class renders them impractical. However, we dramatically improved
on these by introducing the concept of environments generated
by combining laws. The bounds are then linear in the number of laws
needed to generate the environment class. This number is identified as a
natural complexity measure for classes of environments. The individual
law might only predict some feature (factorization) and only in some
contexts (localization). We here extend previous deterministic results to
the important stochastic setting.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(80%),970122(20%)",
znote = "Acceptance rate: 41/72 = 57\%.",
}
@InProceedings{Hutter:15aixicplex,
author = "Jan Leike and Marcus Hutter",
title = "On the Computability of AIXI",
booktitle = "Proc. 31st International Conf. on Uncertainty in Artificial Intelligence ({UAI'15})",
address = "Amsterdam, Netherlands",
_editor = "Marina Meila and Tom Heskes",
publisher = "AUAI Press",
pages = "464--473",
_month = jul,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#aixicplex",
url = "http://arxiv.org/abs/1510.05572",
pdf = "http://www.hutter1.net/publ/aixicplex.pdf",
slides = "http://www.hutter1.net/publ/saixicplex.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-0-9966431-0-8",
keywords = "AIXI; Solomonoff induction; general reinforcement learning;
computability; complexity; arithmetical hierarchy; universal Turing machine.",
abstract = "How could we solve the machine learning and the artificial
intelligence problem if we had infinite computation? Solomonoff
induction and the reinforcement learning agent AIXI are proposed
answers to this question. Both are known to be incomputable. In
this paper, we quantify this using the arithmetical hierarchy, and
prove upper and corresponding lower bounds for incomputability. We
show that AIXI is not limit computable, thus it cannot be
approximated using finite computation. Our main result is a
limit-computable epsilon-optimal version of AIXI with infinite horizon
that maximizes expected rewards.",
support = "ARC grant DP150104590",
for = "080101(50%),080201(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 99/291=34\%",
}
@Article{Hutter:15aixiprior,
author = "Jan Leike and Marcus Hutter",
title = "Bad Universal Priors and Notions of Optimality",
journal = "Journal of Machine Learning Research, W\&CP: COLT",
volume = "40",
pages = "1244--1259",
_editor = "Peter Grünwald and Elad Hazan",
publisher = "",
_address = "Princeton, NJ, USA",
_month = jul,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiprior",
http = "http://jmlr.org/proceedings/papers/v40/Leike15.html",
url = "http://arxiv.org/abs/1510.04931",
pdf = "http://www.hutter1.net/publ/aixiprior.pdf",
slides = "http://www.hutter1.net/publ/saixiprior.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "1532-4435",
keywords = "AIXI, general reinforcement learning, universal Turing machine,
Legg-Hutter intelligence, balanced Pareto optimality, asymptotic optimality.",
abstract = "A big open question of algorithmic information theory is the choice
of the universal Turing machine (UTM). For Kolmogorov complexity
and Solomonoff induction we have invariance theorems: the choice of
the UTM changes bounds only by a constant. For the universally
intelligent agent AIXI (Hutter, 2005) no invariance theorem is
known. Our results are entirely negative: we discuss cases in which
unlucky or adversarial choices of the UTM cause AIXI to misbehave
drastically. We show that Legg-Hutter intelligence and thus
balanced Pareto optimality is entirely subjective, and that every
policy is Pareto optimal in the class of all computable
environments. This undermines all existing optimality properties
for AIXI. While it may still serve as a gold standard for AI, our
results imply that AIXI is a relative theory, dependent on the
choice of the UTM. ",
note = "Also presented at EWRL'15.
http://ewrl.files.wordpress.com/2015/02/ewrl12\_2015\_submission\_3.pdf",
support = "ARC grant DP150104590",
for = "080101(80%),080401(20%)",
seo = "970108(100%)",
znote = "28th Annual Conf. on Learning Theory. Acceptance rate: 27/176 = 15\%",
}
@InCollection{Hutter:15aitcog,
author = "Peter Sunehag and Marcus Hutter",
title = "Algorithmic Complexity",
booktitle = "International Encyclopedia of the Social \& Behavioral Sciences",
volume = "1",
pages = "534–-538",
editor = "James D. Wright",
publisher = "Elsevier",
_month = apr,
year = "2015",
edition = "2nd",
bibtex = "http://www.hutter1.net/official/bib.htm#aitcog",
pdf = "http://www.hutter1.net/publ/aitcog.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
isbn = "978-0-080-97086-8",
doi = "10.1016/B978-0-08-097086-8.43001-1",
keywords = "Kolmogorov Complexity, Algorithmic Information Theory, Cognition,
Rationality, Simplicity, Optimism, Induction, Similarity, Clustering,
Prediction, Agents, Learning, Reinforcement",
abstract = "Algorithmic complexity provides a mathematical formal notion of
string complexity. Building on this, one arrives at mathematical
‘gold standard’ (though incomputable) definitions of randomness,
induction, similarity, and even intelligence. These definitions can
be turned into practical algorithms by using common compressors to
approximate the universal solutions. One can consider the theories
as idealized cognition with respect to which one can aim to
describe actual biological cognition by listing biases and
limitations that need to be defined relative to some normative
reference.",
support = "ARC grant DP120100950",
for = "170203(50%),080401(50%)",
seo = "970117(100%)",
}
@InProceedings{Hutter:15cnc,
author = "Joel Veness and Marc Bellemare and Marcus Hutter and Alvin Chua and Guillaume Desjardins",
title = "Compress and Control",
booktitle = "Proc. 29th {AAAI} Conference on Artificial Intelligence ({AAAI'15})",
address = "Austin, USA",
pages = "3016--3023",
_editor = "Blai Bonet and Sven Koenig",
publisher = "AAAI Press",
_month = jan,
year = "2015",
bibtex = "http://www.hutter1.net/official/bib.htm#cnc",
url = "http://arxiv.org/abs/1411.5326",
pdf = "http://www.hutter1.net/publ/cnc.pdf",
slides = "http://www.hutter1.net/publ/scnc.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "2159-5399",
isbn = "978-1-57735-698-1",
keywords = "reinforcement learning, compression, Q-value function, policy evaluation,
density estimation, on-policy control, Pong, Freeway, Q*Bert",
abstract = "This paper describes a new information-theoretic policy evaluation
technique for reinforcement learning. This technique converts any
compression or density model into a corresponding estimate of
value. Under appropriate stationarity and ergodicity conditions, we
show that the use of a sufficiently powerful model gives rise to a
consistent value function estimator. We also study the behavior of
this technique when applied to various Atari 2600 video games,
where the use of suboptimal modeling techniques is unavoidable. We
consider three fundamentally different models, all too limited to
perfectly model the dynamics of the system. Remarkably, we find
that our technique provides sufficiently accurate value estimates
for effective on-policy control. We conclude with a suggestive
study highlighting the potential of our technique to scale to large
problems.",
for = "080101(100%)",
seo = "970108(100%)",
znote = "Acceptance rate: 531/1991 = 27\%. Oral 200?/1991=10\%",
}
%-------------Publications-of-Marcus-Hutter-2014--------------%
@TechReport{Hutter:14cbayeskl,
author = "Tor Lattimore and Marcus Hutter",
title = "Asymptotics of Continuous Bayes for Non-i.i.d. Sources",
pages = "1--16",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#cbayeskl",
url = "http://arxiv.org/abs/1411.2918",
pdf = "http://www.hutter1.net/publ/cbayeskl.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
keywords = "entropy; stochastic process; Bayes; non-stationary; dependence; sequence prediction; compression",
abstract = "Clarke and Barron analysed the relative entropy between an
i.i.d. source and a Bayesian mixture over a continuous class
containing that source. In this paper a comparable result is
obtained when the source is permitted to be both non-stationary
and dependent. The main theorem shows that Bayesian methods
perform well for both compression and sequence prediction even
in this most general setting with only mild technical
assumptions.",
}
@InProceedings{Hutter:14rladvice,
author = "Mayank Daswani and Peter Sunehag and Marcus Hutter",
title = "Reinforcement Learning with Value Advice",
booktitle = "Proc. 6th Asian Conf. on Machine Learning ({ACML'14})",
volume = "39",
pages = "299--314",
_editor = "Dinh Phung and Hang Li",
publisher = "JMLR",
address = "Canberra, Australia",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#rladvice",
pdf = "http://www.hutter1.net/publ/rladvice.pdf",
slides = "http://www.hutter1.net/publ/srladvice.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
issn = "1532-4435",
doi = "http://jmlr.org/proceedings/papers/v39/daswani14.pdf",
keywords = "feature reinforcement learning; imitation learning; dataset aggregation;
value advice; upper confidence tree; Monte Carlo search; Arcade learning environment.",
abstract = "The problem we consider in this paper is reinforcement learning
with value advice. In this setting, the agent is given limited
access to an oracle that can tell it the expected return (value) of
any state-action pair with respect to the optimal policy. The agent
must use this value to learn an explicit policy that performs well
in the environment. We provide an algorithm called RLAdvice, based
on the imitation learning algorithm DAgger. We illustrate the
effectiveness of this method in the Arcade Learning Environment on
three different games, using value estimates from UCT as advice.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(100%)",
znote = "Acceptance rate: 25/80 = 31\%",
}
@InProceedings{Hutter:14reflect,
author = "Di Yang and Srimal Jayawardena and Stephen Gould and Marcus Hutter",
title = "Reflective Features Detection and Hierarchical Reflections Separation in Image Sequences",
booktitle = "Proc. 16th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'14})",
pages = "1--7",
_editor = "S.L. Phung and A. Bouzerdoum and P. Ogunbona and W. Li and L. Wang",
publisher = "IEEE Xplore",
address = "Wollongong, Australia",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#reflect",
pdf = "http://www.hutter1.net/publ/reflect.pdf",
slides = "http://www.hutter1.net/publ/sreflect.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
doi = "10.1109/DICTA.2014.7008127",
isbn = "978-1-4799-5409-4",
keywords = "computer vision; reflection detection; support vector machine; automatic.",
abstract = "Computer vision techniques such as Structure-from-Motion (SfM) and
object recognition tend to fail on scenes with highly reflective
objects because the reflections behave differently to the true
geometry of the scene. Such image sequences may be treated as two
layers superimposed over each other - the nonreflection scene
source layer and the reflection layer. However, decomposing the two
layers is a very challenging task as it is ill-posed and common
methods rely on prior information. This work presents an automated
technique for detecting reflective features with a comprehensive
analysis of the intrinsic, spatial, and temporal properties of
feature points. A support vector machine (SVM) is proposed to learn
reflection feature points. Predicted reflection feature points are
used as priors to guide the reflection layer separation. This gives
more robust and reliable results than what is achieved by
performing layer separation alone.",
support = "ControlExpert GmbH",
for = "080104(50%),080106(50%)",
}
@Article{Hutter:14pacmdpx,
author = "Tor Lattimore and Marcus Hutter",
title = "Near-Optimal {PAC} bounds for discounted {MDP}s",
journal = "Theoretical Computer Science",
volume = "558",
pages = "125--143",
publisher = "Elsevier",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#pacmdpx",
pdf = "http://www.hutter1.net/publ/pacmdpx.pdf",
slides = "http://www.hutter1.net/publ/spacmdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#agents",
issn = "0304-3975",
doi = "10.1016/j.tcs.2014.09.029",
keywords = "Sample-complexity; PAC bounds; Markov decision processes; Reinforcement learning",
abstract = "We study upper and lower bounds on the sample-complexity of
learning near-optimal behaviour in finite-state discounted Markov
Decision Processes (MDPs). We prove a new bound for a modified
version of Upper Confidence Reinforcement Learning (UCRL) with only
cubic dependence on the horizon. The bound is unimprovable in all
parameters except the size of the state/action space, where it
depends linearly on the number of non-zero transition
probabilities. The lower bound strengthens previous work by being
both more general (it applies to all policies) and tighter. The
upper and lower bounds match up to logarithmic factors provided the
transition matrix is not too dense.",
support = "ARC grant DP120100950",
for = "010404(30%),010405(30%),080198(40%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:14ktoptdif,
author = "Tansu Alpcan and Tom Everitt and Marcus Hutter",
title = "Can we Measure the Difficulty of an Optimization Problem?",
booktitle = "{IEEE} Information Theory Workshop",
pages = "356--360",
_editor = "Yi Hong and Jamie Evans and Emanuele Viterbo and Urbashi Mitra",
publisher = "IEEE Press",
address = "Hobart, Australia",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#ktoptdif",
pdf = "http://www.hutter1.net/publ/ktoptdif.pdf",
slides = "http://www.hutter1.net/publ/sktoptdif.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "1662-9019",
isbn = "978-1-4799-5998-0",
doi = "10.1109/ITW.2014.6970853",
keywords = "open box optimization; problem complexity; algorithmic information theory",
abstract = "Can we measure the difficulty of an optimization
problem? Although optimization plays a crucial role in modern
science and technology, a formal framework that puts problems
and solution algorithms into a broader context has not been
established. This paper presents a conceptual approach which
gives a positive answer to the question for a broad class of
optimization problems. Adopting an information and computational
perspective, the proposed framework builds upon Shannon and
algorithmic information theories. As a starting point, a concrete
model and definition of optimization problems is provided. Then,
a formal definition of optimization difficulty is introduced which
builds upon algorithmic information theory. Following an initial
analysis, lower and upper bounds on optimization difficulty
are established. One of the upper-bounds is closely related to
Shannon information theory and black-box optimization. Finally,
various computational issues and future research directions are
discussed.",
for = "080401(70%),080198(30%)",
seo = "970801(100%)",
}
@InProceedings{Hutter:14epipolar,
author = "S. Jayawardena and S. Gould and H. Li and M. Hutter and R. Hartley",
title = "Reliable Point Correspondences in Scenes Dominated by Highly Reflective and Largely Homogeneous Surfaces",
booktitle = "Proc. 12th Asian Conf. on Computer Vision -- Workshop ({RoLoD@ACCV'14}) Part I",
address = "Singapore",
series = "LNCS",
volume = "9008",
pages = "659--674",
_editor = "C. V. Jawahar and Shiguang Shan",
publisher = "Springer",
_month = nov,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#epipolar",
pdf = "http://www.hutter1.net/publ/epipolar.pdf",
slides = "http://www.hutter1.net/publ/sepipolar.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
issn = "0302-9743",
isbn = "978-3-319-16627-8",
doi = "10.1007/978-3-319-16628-5_47",
keywords = "point correspondences; reflections; homogeneous; texture impoverished;
epipolar geometry; fundamental matrix; structure from motion; noisy.",
abstract = "Common Structure from Motion (SfM) tasks require reliable point
correspondences in images taken from different views to
subsequently estimate model parameters which describe the 3D scene
geometry. For example when estimating the fundamental matrix from
point correspondences using RANSAC. The amount of noise in the
point correspondences drastically affect the estimation algorithm
and the number of iterations needed for convergence grows
exponentially with the level of noise. In scenes dominated by
highly reflective and largely homogeneous surfaces such as vehicle
panels and buildings with a lot of glass, existing approaches give
a very high proportion of spurious point correspondences. As a
result the number of iterations required for subsequent model
estimation algorithms become intractable. We propose a novel method
that uses descriptors evaluated along points in image edges to
obtain a sufficiently high proportion of correct point
correspondences. We show experimentally that our method gives
better results in recovering the epipolar geometry in scenes
dominated by highly reflective and homogeneous surfaces compared to
common baseline methods on stereo images taken from considerably
wide baselines.",
support = "ControlExpert GmbH",
for = "080106(100%)",
seo = "970108(80%),890205(20%)",
znote = "Acceptance rate: 153/307 = 50\%",
}
@InProceedings{Hutter:14martosc,
author = "Jan Leike and Marcus Hutter",
title = "Indefinitely Oscillating Martingales",
booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
address = "Bled, Slovenia",
series = "LNAI",
volume = "8776",
_editor = "Peter Auer and Alexander Clark",
publisher = "Springer",
pages = "321--335",
_month = oct,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#martosc",
url = "http://arxiv.org/abs/1408.3169",
pdf = "http://www.hutter1.net/publ/martosc.pdf",
latex = "http://www.hutter1.net/publ/martosc.tex",
slides = "http://www.hutter1.net/publ/smartosc.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
issn = "0302-9743",
isbn = "978-3-319-11661-7",
doi = "10.1007/978-3-319-11662-4_23",
keywords = "martingales, infinite oscillations, bounds, convergence
rates, minimum description length, mind changes.",
abstract = "We construct a class of nonnegative martingale processes
that oscillate indefinitely with high probability. For these processes,
we state a uniform rate of the number of oscillations for a given magnitude
and show that this rate is asymptotically close to the theoretical upper
bound. These bounds on probability and expectation of the number of
upcrossings are compared to classical bounds from the martingale literature.
We discuss two applications. First, our results imply that the
limit of the minimum description length operator may not exist. Second,
we give bounds on how often one can change one’s belief in a given
hypothesis when observing a stream of data.",
for = "010405(60%),080101(40%)",
seo = "970101(60%),970108(40%)",
znote = "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14off2on,
author = "Marcus Hutter",
title = "Offline to Online Conversion",
booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
address = "Bled, Slovenia",
series = "LNAI",
volume = "8776",
_editor = "Peter Auer and Alexander Clark",
publisher = "Springer",
pages = "230--244",
_month = oct,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#off2on",
url = "http://arxiv.org/abs/1407.3334",
pdf = "http://www.hutter1.net/publ/off2on.pdf",
latex = "http://www.hutter1.net/publ/off2on.tex",
slides = "http://www.hutter1.net/publ/soff2on.pdf",
project = "http://www.hutter1.net/official/projects.htm#infoth",
issn = "0302-9743",
isbn = "978-3-319-11661-7",
doi = "10.1007/978-3-319-11662-4_17",
keywords = "offline; online; batch; sequential; probability; estimation;
prediction; time-consistency; normalization; tractable; regret;
combinatorics; Bayes; Laplace; Ristad; Good-Turing.",
abstract = "We consider the problem of converting offline estimators into an
online predictor or estimator with small extra regret. Formally
this is the problem of merging a collection of probability
measures over strings of length 1,2,3,... into a single
probability measure over infinite sequences. We describe various
approaches and their pros and cons on various examples. As a
side-result we give an elementary non-heuristic purely
combinatoric derivation of Turing's famous estimator. Our main
technical contribution is to determine the computational
complexity of online estimators with good guarantees in general.",
for = "080401(30%),080201(30%),010405(40%)",
seo = "970108(100%)",
znote = "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14exsagg,
author = "Marcus Hutter",
title = "Extreme State Aggregation beyond {MDP}s",
booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
address = "Bled, Slovenia",
series = "LNAI",
volume = "8776",
_editor = "Peter Auer and Alexander Clark",
publisher = "Springer",
pages = "185--199",
_month = oct,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#exsagg",
url = "http://arxiv.org/abs/1407.3341",
pdf = "http://www.hutter1.net/publ/exsagg.pdf",
latex = "http://www.hutter1.net/publ/exsagg.tex",
slides = "http://www.hutter1.net/publ/sexsagg.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
issn = "0302-9743",
isbn = "978-3-319-11661-7",
doi = "10.1007/978-3-319-11662-4_14",
keywords = "state aggregation, reinforcement learning, non-MDP.",
abstract = "We consider a Reinforcement Learning setup without any (esp.\ MDP)
assumptions on the environment. State aggregation and more
generally feature reinforcement learning is concerned with mapping
histories/raw-states to reduced/aggregated states. The idea behind
both is that the resulting reduced process (approximately) forms a
small stationary finite-state MDP, which can then be efficiently
solved or learnt. We considerably generalize existing aggregation
results by showing that even if the reduced process is not an MDP,
the (q-)value functions and (optimal) policies of an associated MDP
with same state-space size solve the original problem, as long as
the solution can approximately be represented as a function of the
reduced states. This implies an upper bound on the required state
space size that holds uniformly for all RL problems. It may also
explain why RL algorithms designed for MDPs sometimes perform well
beyond MDPs.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(100%)",
znote = "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14pacbayes,
author = "Tor Lattimore and Marcus Hutter",
title = "Bayesian Reinforcement Learning with Exploration",
booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})",
address = "Bled, Slovenia",
series = "LNAI",
volume = "8776",
_editor = "Peter Auer and Alexander Clark",
publisher = "Springer",
pages = "170--184",
_month = oct,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#pacbayes",
pdf = "http://www.hutter1.net/publ/pacbayes.pdf",
slides = "http://www.hutter1.net/publ/spacbayes.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "0302-9743",
isbn = "978-3-319-11661-7",
doi = "10.1007/978-3-319-11662-4_13",
keywords = "reinforcement learning; sample complexity; Bayes-optimal; exploration; PAC bounds",
abstract = "We consider a general reinforcement learning problem and show that
carefully combining the Bayesian optimal policy and an exploring
policy leads to minimax sample-complexity bounds in a very general
class of (history-based) environments. We also prove lower bounds
and show that the new algorithm displays adaptive behaviour when
the environment is easier than worst-case.",
support = "ARC grant DP120100950",
for = "080101(80%),010404(40%)",
seo = "970108(100%)",
znote = "Acceptance rate: 21/50 = 42\%",
}
@InProceedings{Hutter:14learnutm,
author = "Peter Sunehag and Marcus Hutter",
title = "Intelligence as Inference or Forcing {O}ccam on the World",
booktitle = "Proc. 7th Conf. on Artificial General Intelligence ({AGI'14})",
series = "LNAI",
volume = "8598",
pages = "186--195",
_editor = "Ben Goertzel and Laurent Orseau and Javier Snaider",
publisher = "Springer",
address = "Quebec City, Canada",
_month = aug,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#learnutm",
pdf = "http://www.hutter1.net/publ/learnutm.pdf",
slides = "http://www.hutter1.net/publ/slearnutm.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-319-09274-4_18",
issn = "0302-9743",
isbn = "978-3-319-09273-7",
keywords = "Ockham; Universal; Intelligence; Learning; Turing Machine;
Expectation Maximization; Evolution; Reasoning; Agents; Reward.",
abstract = "We propose to perform the optimization task of Universal Artificial
Intelligence (UAI) through learning a reference machine on which
good programs are short. Further, we also acknowledge that the
choice of reference machine that the UAI objective is based on is
arbitrary and, therefore, we learn a suitable machine for the
environment we are in. This is based on viewing Occam's razor as an
imperative instead of as a proposition about the world. Since this
principle cannot be true for all reference machines, we need to
find a machine that makes the principle true. We both want good
policies and the environment to have short implementations on the
machine. Such a machine is learnt iteratively through a procedure
that generalizes the principle underlying the
Expectation-Maximization algorithm.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(80%),970122(20%)",
znote = "Acceptance rate: 22/65 = 34\%.",
}
@InProceedings{Hutter:14optcog,
author = "Peter Sunehag and Marcus Hutter",
title = "A Dual Process Theory of Optimistic Cognition",
booktitle = "Proc. 36th Annual Meeting of the Cognitive Science Society ({CogSci'14})",
pages = "2949--2954",
_editor = "Paul Bello and Marcello Guarini and Marjorie McShane and Brian Scassellati",
publisher = "Curran Associates",
address = "Quebec City, Canada",
_month = jul,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#optcog",
http = "http://mindmodeling.org/cogsci2014/papers/509/paper509.pdf",
pdf = "http://www.hutter1.net/publ/optcog.pdf",
slides = "http://www.hutter1.net/publ/soptcog.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
isbn = "978-1-63439-116-0",
keywords = "Rationality, Optimism, Optimality, Reinforcement Learning",
abstract = "Optimism is a prevalent bias in human cognition including
variations like self-serving beliefs, illusions of control and
overly positive views of one’s own future. Further, optimism
has been linked with both success and happiness. In fact, it has
been described as a part of human mental well-being which has
otherwise been assumed to be about being connected to reality.
In reality, only people suffering from depression are realistic.
Here we study a formalization of optimism within a dual process
framework and study its usefulness beyond human needs
in a way that also applies to artificial reinforcement learning
agents. Optimism enables systematic exploration which is essential
in an (partially) unknown world. The key property of
an optimistic hypothesis is that if it is not contradicted when
one acts greedily with respect to it, then one is well rewarded
even if it is wrong.",
support = "ARC grant DP120100950",
for = "080101(50%),170202(50%)",
seo = "970108(70%),970117(30%)",
}
@InProceedings{Hutter:14frlabs,
author = "Mayank Daswani and Peter Sunehag and Marcus Hutter",
title = "Feature Reinforcement Learning: State of the Art",
booktitle = "Proc. Workshops at the 28th {AAAI} Conference on Artificial Intelligence:
Sequential Decision Making with Big Data",
pages = "2--5",
_editor = "Amir-Massoud Farahmand et al.",
publisher = "AAAI Press",
address = "Quebec City, Canada",
_month = jul,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#frlabs",
http = "http://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/view/8791",
pdf = "http://www.hutter1.net/publ/frlabs.pdf",
slides = "http://www.hutter1.net/publ/sfrlabs.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
keywords = "Reinforcement learning; temporal difference learning;
partial observability; Q-learning; feature learning;
function approximation; rational agents.",
abstract = "Feature reinforcement learning was introduced five years ago
as a principled and practical approach to history-based learn-
ing. This paper examines the progress since its inception. We
now have both model-based and model-free cost functions,
most recently extended to the function approximation setting.
Our current work is geared towards playing ATARI games us-
ing imitation learning, where we use Feature RL as a feature
selection method for high-dimensional domains",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(100%)",
znote = "http://sites.google.com/site/decisionmakingbigdata/",
}
@InProceedings{Hutter:14floud,
author = "Tom Everitt and Tor Lattimore and Marcus Hutter",
title = "Free Lunch for Optimisation under the Universal Distribution",
booktitle = "Proc. 2014 Congress on Evolutionary Computation ({CEC'14})",
pages = "167--174",
_editor = "Derong Liu and Jennie Si",
publisher = "IEEE",
address = "Beijing, China",
_month = jul,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#floud",
url = "http://arxiv.org/abs/1608.04544",
pdf = "http://www.hutter1.net/publ/floud.pdf",
slides = "http://www.hutter1.net/publ/sfloud.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
isbn = "978-1-4799-6626-4",
doi = "10.1109/CEC.2014.6900546",
keywords = "function optimization; universal prior; Occam's Razor; No Free Lunch.",
abstract = "Function optimisation is a major challenge in computer science. The
No Free Lunch theorems state that if all functions with the same
histogram are assumed to be equally probable then no algorithm
outperforms any other in expectation. We argue against the uniform
assumption and suggest a universal prior exists for which there is
a free lunch, but where no particular class of functions is
favoured over another. We also prove upper and lower bounds on the
size of the free lunch.",
for = "080199(70%),010404(30%)",
seo = "970108(100%)",
}
@Article{Hutter:14tcdiscx,
author = "Tor Lattimore and Marcus Hutter",
title = "General Time Consistent Discounting",
journal = "Theoretical Computer Science",
volume = "519",
pages = "140--154",
publisher = "Elsevier",
_month = jan,
year = "2014",
bibtex = "http://www.hutter1.net/official/bib.htm#tcdiscx",
pdf = "http://www.hutter1.net/publ/tcdiscx.pdf",
slides = "http://www.hutter1.net/publ/stcdisc.pdf",
project = "http://www.hutter1.net/official/projects.htm#agents",
issn = "0304-3975",
doi = "10.1016/j.tcs.2013.09.022",
keywords = "Rational agents; sequential decision theory;
general discounting; time-consistency; game theory.",
abstract = "Modeling inter-temporal choice is a key problem in both computer
science and economic theory. The discounted utility model of
Samuelson is currently the most popular model for measuring the
global utility of a time-series of local utilities. The model is
limited by not allowing the discount function to change with the
age of the agent. This is despite the fact that many agents, in
particular humans, are best modelled with age-dependent discount
functions. It is well known that discounting can lead to
time-inconsistent behaviour where agents change their preferences
over time. In this paper we generalise the discounted utility model
to allow age-dependent discount functions. We then extend previous
work in time-inconsistency to our new setting, including a complete
characterisation of time-(in)consistent discount functions, the
existence of sub-game perfect equilibrium policies where the
discount function is time-inconsistent and a continuity result
showing that ``nearly'' time-consistent discount rates lead to
``nearly'' time-consistent behaviour.",
for = "010405(20%),080101(40%),140104(20%),170202(20%)",
seo = "970108(40%),970114(30%),970117(30%)",
}
%-------------Publications-of-Marcus-Hutter-2013--------------%
@Article{Hutter:13uai4lay,
author = "Marcus Hutter",
title = "To Create a Super-Intelligent Machine, Start with an Equation",
journal = "The Conversation",
volume = "November",
number = "29",
pages = "1--5",
_month = nov,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#uai4lay",
url = "http://theconversation.com/to-create-a-super-intelligent-machine-start-with-an-equation-20756",
pdf = "http://www.hutter1.net/publ/uai4lay.pdf",
slides = "http://www.hutter1.net/publ/suai4lay.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
keywords = "intelligence; mathematics; learning; planning; rational agents; foundations.",
abstract = "Intelligence is a very difficult concept and, until recently, no
one has succeeded in giving it a satisfactory formal definition.
Most researchers have given up grappling with the notion of
intelligence in full generality, and instead focus on related but
more limited concepts – but I argue that mathematically defining
intelligence is not only possible, but crucial to understanding and
developing super-intelligent machines. From this, my research group
has even successfully developed software that can learn to play
Pac-Man from scratch.",
for = "080401(20%),080101(30%),080199(30%),220399(20%)",
seo = "970108(80%),870122(20%)",
znote = "Top 10 of 700+ ANU articles till 2013. 25'000+ views.
http://theconversation.com/metrics/institutions/australian-national-university/article_leaderboard",
}
@InProceedings{Hutter:13rlqh,
author = "Mayank Daswani and Peter Sunehag and Marcus Hutter",
title = "Q-Learning for History-Based Reinforcement Learning",
booktitle = "Proc. 5th Asian Conf. on Machine Learning ({ACML'13})",
volume = "29",
pages = "213--228",
_editor = "Tu Bao Ho and Cheng Soon Ong",
publisher = "JMLR",
address = "Canberra, Australia",
_month = nov,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#rlqh",
http = "http://proceedings.mlr.press/v29/Daswani13.html",
pdf = "http://www.hutter1.net/publ/rlqh.pdf",
slides = "http://www.hutter1.net/publ/srlqh.pdf",
poster = "http://www.hutter1.net/publ/prlqh.pdf",
project = "http://www.hutter1.net/official/projects.htm#frl",
issn = "1532-4435",
doi = "http://jmlr.org/proceedings/papers/v29/Daswani13.pdf",
keywords = "feature reinforcement learning; temporal difference learning;
Markov decision process; partial observability; Q-learning;
Monte Carlo search; Pocman; rational agents.",
abstract = "We extend the Q-learning algorithm from the Markov Decision Process
setting to problems where observations are non-Markov and do not
reveal the full state of the world i.e. to POMDPs. We do this in a
natural manner by adding l0 regularisation to the pathwise squared
Q-learning objective function and then optimise this over both a
choice of map from history to states and the resulting MDP
parameters. The optimisation procedure involves a stochastic search
over the map class nested with classical Q-learning of the
parameters. This algorithm fits perfectly into the feature
reinforcement learning framework, which chooses maps based on a
cost criteria. The cost criterion used so far for feature
reinforcement learning has been model-based and aimed at predicting
future states and rewards. Instead we directly predict the return,
which is what is needed for choosing optimal actions. Our
Q-learning criteria also lends itself immediately to a function
approximation setting where features are chosen based on the
history. This algorithm is somewhat similar to the recent line of
work on lasso temporal difference learning which aims at finding a
small feature set with which one can perform policy evaluation. The
distinction is that we aim directly for learning the Q-function of
the optimal policy and we use l0 instead of l1 regularisation. We
perform an experimental evaluation on classical benchmark domains
and find improvement in convergence speed as well as in economy of
the state representation. We also compare against MC-AIXI on the
large Pocman domain and achieve competitive performance in average
reward. We use less than half the CPU time and 36 times less
memory. Overall, our algorithm hQL provides a better combination of
computational, memory and data efficiency than existing algorithms in
this setting.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(100%)",
znote = "long presentation: Acceptance rate: 13/103 = 13\%",
}
@Article{Hutter:13problogic,
author = "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther",
title = "Probabilities on Sentences in an Expressive Logic",
journal = "Journal of Applied Logic",
volume = "11",
pages = "386--420",
_publisher = "Elsevier",
_month = nov,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#problogic",
url = "http://arxiv.org/abs/1209.2620",
pdf = "http://www.hutter1.net/publ/problogic.pdf",
latex = "http://www.hutter1.net/publ/problogic.tex",
slides = "http://www.hutter1.net/publ/sproblogic.pdf",
video = "http://www.youtube.com/watch?v=WEkZSHcRsAM",
project = "http://www.hutter1.net/official/projects.htm#logic",
doi = "10.1016/j.jal.2013.03.003",
issn = "1570-8683",
keywords = "higher-order logic; probability on sentences;
Gaifman; Cournot; Bayes; induction; confirmation;
learning; prior; knowledge; entropy.",
abstract = "Automated reasoning about uncertain knowledge has many applications.
One difficulty when developing such systems is the lack
of a completely satisfactory integration of logic and probability.
We address this problem directly.
Expressive languages like higher-order logic are ideally suited
for representing and reasoning about structured knowledge.
Uncertain knowledge can be modeled by using graded probabilities
rather than binary truth-values.
The main technical problem studied in this paper is the following:
Given a set of sentences, each having some probability of being true,
what probability should be ascribed to other (query) sentences?
A natural wish-list, among others, is that the probability distribution
(i) is consistent with the knowledge base,
(ii) allows for a consistent inference procedure and in particular
(iii) reduces to deductive logic in the limit of probabilities being 0 and 1,
(iv) allows (Bayesian) inductive reasoning and
(v) learning in the limit and in particular
(vi) allows confirmation of universally quantified hypotheses/sentences.
We translate this wish-list into technical requirements for a prior probability
and show that probabilities satisfying all our criteria exist.
We also give explicit constructions and several general
characterizations of probabilities that satisfy some or all of
the criteria and various (counter) examples.
We also derive necessary and sufficient conditions for
extending beliefs about finitely many sentences to suitable
probabilities over all sentences,
and in particular least dogmatic or least biased ones.
We conclude with a brief outlook on how the developed theory might
be used and approximated in autonomous reasoning agents.
Our theory is a step towards a globally consistent and
empirically satisfactory unification of probability and logic.",
support = "ARC grant DP0877635",
for = "080203(50%),010404(30%),080401(10%),080101(10%)",
seo = "970108(80%),970101(20%)",
znote = "Presented at Progic 2011: http://sites.google.com/site/progicconference2011/
and at WL4AI@IJCAI 2013: http://ijcai13.org/program/workshop/32",
}
@InProceedings{Hutter:13ksaprob,
author = "Laurent Orseau and Tor Lattimore and Marcus Hutter",
title = "Universal Knowledge-Seeking Agents for Stochastic Environments",
booktitle = "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})",
address = "Singapore",
series = "LNAI",
volume = "8139",
_editor = "S. Jain and R. Munos and F. Stephan and Th. Zeugmann",
publisher = "Springer",
pages = "158--172",
_month = oct,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#ksaprob",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/",
pdf = "http://www.hutter1.net/publ/ksaprob.pdf",
slides = "http://www.hutter1.net/publ/sksaprob.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-40935-6_12",
issn = "0302-9743",
isbn = "978-3-642-40934-9",
keywords = "Universal artificial intelligence; exploration; reinforcement learning;
algorithmic information theory; Solomonoff induction.",
abstract = "We define an optimal Bayesian knowledge-seeking agent, KL-KSA,
designed for countable hypothesis classes of stochastic
environments and whose goal is to gather as much information about
the unknown world as possible. Although this agent works for
arbitrary countable classes and priors, we focus on the especially
interesting case where all stochastic computable environments are
considered and the prior is based on Solomonoff's universal prior.
Among other properties, we show that KL-KSA learns the true
environment in the sense that it learns to predict the consequences
of actions it does not take. We show that it does not consider
noise to be information and avoids taking actions leading to
inescapable traps. We also present a variety of toy experiments
demonstrating that KL-KSA behaves according to expectation.",
for = "080101(50%),080199(50%)",
seo = "970108(100%)",
znote = "Acceptance rate: 23/39 = 59\%",
}
@InProceedings{Hutter:13ccbayessp,
author = "Tor Lattimore and Marcus Hutter and Peter Sunehag",
title = "Concentration and Confidence for Discrete Bayesian Sequence Predictors",
booktitle = "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})",
address = "Singapore",
series = "LNAI",
volume = "8139",
_editor = "S. Jain and R. Munos and F. Stephan and Th. Zeugmann",
publisher = "Springer",
pages = "324--338",
_month = oct,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#ccbayessp",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/",
url = "http://arxiv.org/abs/1307.0127",
pdf = "http://www.hutter1.net/publ/ccbayessp.pdf",
slides = "http://www.hutter1.net/publ/sccbayessp.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
doi = "10.1007/978-3-642-40935-6_23",
issn = "0302-9743",
isbn = "978-3-642-40934-9",
keywords = "Bayesian sequence prediction; concentration of measure;
information theory; KWIK learning.",
abstract = "Bayesian sequence prediction is a simple technique for predicting
future symbols sampled from an unknown measure on infinite
sequences over a countable alphabet. While strong bounds on the
expected cumulative error are known, there are only limited results
on the distribution of this error. We prove tight high-probability
bounds on the cumulative error, which is measured in terms of the
Kullback-Leibler (KL) divergence. We also consider the problem of
constructing upper confidence bounds on the KL and Hellinger errors
similar to those constructed from Hoeffding-like bounds in the
i.i.d. case. The new results are applied to show that Bayesian
sequence prediction can be used in the Knows What It Knows (KWIK)
framework with bounds that match the state-of-the-art.",
support = "ARC grant DP120100950",
for = "010405(70%),010404(30%)",
seo = "970101(100%)",
znote = "Acceptance rate: 23/39 = 59\%",
}
@Proceedings{Hutter:13ewrlabs,
editor = "Peter Auer and Marcus Hutter and Laurent Orseau",
title = "Reinforcement Learning",
subtitle = "Dagstuhl Seminar 13321 ({EWRL'13})",
publisher = "Schloss Dagstuhl -- Leibniz-Zentrum fuer Informatik",
address = "Dagstuhl, Germany",
volume = "3",
number = "8",
_month = aug,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#ewrlabs13",
url = "http://drops.dagstuhl.de/opus/volltexte/2013/4340/",
pdf = "http://www.hutter1.net/publ/ewrlabs13.pdf",
project = "http://www.hutter1.net/official/projects.htm#other",
issn = "2192-5283",
doi = "10.4230/DagRep.3.8.1",
keywords = "Machine Learning, Reinforcement Learning, Markov Decision Processes, Planning",
abstract = "This Dagstuhl Seminar also stood as the 11th European Workshop on
Reinforcement Learning (EWRL11). Reinforcement learning gains more
and more attention each year, as can be seen at the various
conferences (ECML, ICML, IJCAI, ...). EWRL, and in particular this
Dagstuhl Seminar, aimed at gathering people interested in
reinforcement learning from all around the globe. This unusual
format for EWRL helped viewing the field and discussing topics
differently.",
for = "080101(50%),080198(50%)",
seo = "970108(100%)"
}
@InProceedings{Hutter:13problogics,
author = "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther",
title = "Unifying Probability and Logic for Learning",
booktitle = "Proc. 2nd Workshop on Weighted Logics for AI ({WL4AI'13})",
_volume = "2",
pages = "65--72",
_editor = "Lluis Godo and Henri Prade and Guilin Qi",
publisher = "",
address = "Beijing, China",
_month = aug,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#problogics",
url = "http://ijcai13.org/program/workshop/32",
http = "http://www.iiia.csic.es/wl4ai-2013/working_papers",
pdf = "http://www.hutter1.net/publ/problogics.pdf",
slides = "http://www.hutter1.net/publ/sproblogic.pdf",
video = "http://www.youtube.com/watch?v=WEkZSHcRsAM",
project = "http://www.hutter1.net/official/projects.htm#logic",
conf = "http://www.iiia.csic.es/wl4ai-2013/",
keywords = "higher-order logic; probability on sentences;
Gaifman; Cournot; Bayes; induction; confirmation;
learning; prior; knowledge; entropy.",
abstract = "Uncertain knowledge can be modeled by using graded probabilities
rather than binary truth-values, but so far a completely satisfactory
integration of logic and probability has been lacking.
In particular the inability of confirming universal hypotheses
has plagued most if not all systems so far.
We address this problem head on.
The main technical problem to be discussed is the following:
Given a set of sentences, each having some probability of being true,
what probability should be ascribed to other (query) sentences?
A natural wish-list, among others, is that the probability distribution
(i) is consistent with the knowledge base,
(ii) allows for a consistent inference procedure and in particular
(iii) reduces to deductive logic in the limit of probabilities being 0 and 1,
(iv) allows (Bayesian) inductive reasoning and
(v) learning in the limit and in particular
(vi) allows confirmation of universally quantified hypotheses/sentences.
We show that probabilities satisfying (i)-(vi) exist,
and present necessary and sufficient conditions (Gaifman and Cournot).
The theory is a step towards a globally consistent and
empirically satisfactory unification of probability and logic.",
support = "ARC grant DP0877635",
for = "080203(50%),010404(30%),080401(10%),080101(10%)",
seo = "970108(80%),970101(20%)",
znote = "Only appears on WS/IJCAI website",
}
@InProceedings{Hutter:13agscilaws,
author = "Peter Sunehag and Marcus Hutter",
title = "Learning Agents with Evolving Hypothesis Classes",
booktitle = "Proc. 6th Conf. on Artificial General Intelligence ({AGI'13})",
series = "LNAI",
volume = "7999",
pages = "150--159",
_editor = "Kai-Uwe Kuehnberger and Sebastian Rudolph and Pei Wang",
publisher = "Springer, Heidelberg",
_address = "Beijing, China",
_month = jul,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#agscilaws",
pdf = "http://www.hutter1.net/publ/agscilaws.pdf",
slides = "http://www.hutter1.net/publ/sagscilaws.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-39521-5_16",
issn = "0302-9743",
isbn = "978-3-642-39520-8",
abstract = "It has recently been shown that a Bayesian agent with a universal
hypothesis class resolves most induction problems discussed in the
philosophy of science. These ideal agents are, however, neither practical
nor a good model for how real science works. We here introduce a framework
for learning based on implicit beliefs over all possible hypotheses
and limited sets of explicit theories sampled from an implicit distribution
represented only by the process by which it generates new hypotheses.
We address the questions of how to act based on a limited set of theories
as well as what an ideal sampling process should be like. Finally,
we discuss topics in philosophy of science and cognitive science from the
perspective of this framework.",
support = "ARC grant DP120100950",
for = "080101(100%)",
seo = "970108(80%),970122(20%)",
znote = "Acceptance rate: 20/42 = 48\%.",
}
@Article{Hutter:13pacgrl,
author = "Tor Lattimore and Marcus Hutter and Peter Sunehag",
title = "The Sample-Complexity of General Reinforcement Learning",
journal = "Journal of Machine Learning Research, W\&CP: ICML",
volume = "28",
number = "3",
pages = "28--36",
_editor = "S. Dasgupta and D. McAllester",
publisher = "",
_address = "Atlanta, Georgia, USA",
_month = jun,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#pacgrl",
http = "http://jmlr.org/proceedings/papers/v28/lattimore13.html",
url = "http://arxiv.org/abs/1308.4828",
pdf = "http://www.hutter1.net/publ/pacgrl.pdf",
latex = "http://www.hutter1.net/publ/pacgrl.tex",
slides = "http://www.hutter1.net/publ/spacgrl.pdf",
project = "http://www.hutter1.net/official/projects.htm#agent",
issn = "1532-4435",
keywords = "reinforcement learning; sample complexity; PAC bounds",
abstract = "We present a new algorithm for general reinforcement learning where
the true environment is known to belong to a finite class of N
arbitrary models. The algorithm is shown to be near-optimal for all
but O(N log^2 N) timesteps with high probability. Infinite classes
are also considered where we show that compactness is a key
criterion for determining the existence of uniform
sample-complexity bounds. A matching lower bound is given for the
finite case.",
support = "ARC grant DP120100950",
for = "010405(50%),080199(50%). See CD4/Projects/Grant-Info.txt for more",
seo = "970108(100%)",
}
@Article{Hutter:13sad,
author = "Marcus Hutter",
title = "Sparse Adaptive {D}irichlet-Multinomial-like Processes",
journal = "Journal of Machine Learning Research, W\&CP: COLT",
volume = "30",
pages = "432--459",
_month = jun,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#sad",
url = "http://arxiv.org/abs/1305.3671",
pdf = "http://www.hutter1.net/publ/sad.pdf",
latex = "http://www.hutter1.net/publ/sad.tex",
slides = "http://www.hutter1.net/publ/ssad.pdf",
audio = "http://vmc.aarnet.edu.au/userdata/0b/0b4d5c6f-e775-4d48-8b47-32dc95d19b8b/ingest1685426376076922317.asf",
project = "http://www.hutter1.net/official/projects.htm#infoth",
issn = "1532-4435",
keywords = "sparse coding; adaptive parameters; Dirichlet-Multinomial;
Polya urn; data-dependent redundancy bound;
small/large alphabet; data compression.",
abstract = "Online estimation and modelling of i.i.d. data for short
sequences over large or complex ``alphabets'' is a ubiquitous
(sub)problem in machine learning, information theory, data
compression, statistical language processing, and document
analysis. The Dirichlet-Multinomial distribution (also called
Polya urn scheme) and extensions thereof are widely applied for
online i.i.d. estimation. Good a-priori choices for the
parameters in this regime are difficult to obtain though. I
derive an optimal adaptive choice for the main parameter via
tight, data-dependent redundancy bounds for a related model. The
1-line recommendation is to set the 'total mass' = 'precision' =
'concentration' parameter to m/2ln[(n+1)/m], where n
is the (past) sample size and m the number of different symbols
observed (so far). The resulting estimator is simple, online,
fast, and experimental performance is superb.",
for = "080401(70%),010405(30%)",
seo = "970108(100%)",
znote = "26th Annual Conf. on Learning Theory. Acceptance rate: 47/98 = 48\%",
}
@InProceedings{Hutter:13mnonconv,
author = "Tor Lattimore and Marcus Hutter",
title = "On {M}artin-L{\"o}f Convergence of {S}olomonoff's Mixture",
booktitle = "Proc. 10th Annual Conference on Theory and
Applications of Models of Computation ({TAMC'13})",
volume = "7876",
pages = "212--223",
series = "LNCS",
_editor = "T-H.H. Chan and L.C. Lau and L. Trevisan",
publisher = "Springer",
address = "Hong Kong, China",
_month = may,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#mnonconv",
pdf = "http://www.hutter1.net/publ/mnonconv.pdf",
slides = "http://www.hutter1.net/publ/smnonconv.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "0302-9743",
isbn = "978-3-642-38235-2",
doi = "10.1007/978-3-642-38236-9_20",
keywords = "Solomonoff induction, Kolmogorov complexity, theory of computation.",
abstract = "We study the convergence of Solomonoff's universal mixture
on individual Martin-L{\"o}f random sequences. A new result is presented
extending the work of Hutter and Muchnik (2004) by showing that there
does not exist a universal mixture that converges on all Martin-L{\"o}f
random sequences.",
for = "080401(50%),010404(30%),010405(20%)",
seo = "970101(30%),970108(70%)",
znote = "Acceptance rate: 31/70 = 44\%",
}
@Article{Hutter:13alttcs,
author = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
title = "{ALT'10} Special Issue",
journal = "Theoretical Computer Science",
editor = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
volume = "473",
publisher = "Elsevier",
pages = "1--3/178",
_month = feb,
year = "2013",
bibtex = "http://www.hutter1.net/official/bib.htm#alt10tcs",
http = "http://www.sciencedirect.com/science/journal/03043975/473",
doi = "10.1016/j.tcs.2012.10.007",
issn = "0304-3975",
keywords = "algorithmic learning theory, special issue, preface",
abstract = "This special issue contains expanded versions of papers that appeared in
preliminary form in the proceedings of the 21st International Conference
on Algorithmic Learning Theory (ALT 2010), which was held in Canberra,
Australia during October 6--8, 2010. \emph{Algorithmic Learning Theory} is
a conference series which is dedicated to the theoretical study of the
algorithmic aspects of learning. The best papers of the conference ALT 2010
were invited for this special issue and after a thorough reviewing process,
most of them qualified for this Special Issue on Algorithmic Learning Theory
of Theoretical Computer Science. The preface contains a short introduction
to each of these papers.",
for = "080401(20%),010405(20%),080199(60%)",
seo = "970108(100%)",
}
%-------------Publications-of-Marcus-Hutter-2012--------------%
@Article{Hutter:12lstphi,
author = "Mayank Daswani and Peter Sunehag and Marcus Hutter",
title = "Feature Reinforcement Learning using Looping Suffix Trees",
journal = "Journal of Machine Learning Research, W\&CP",
volume = "24",
pages = "11--23",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#lstphi",
http = "http://proceedings.mlr.press/v24/daswani12a.html",
pdf = "http://www.hutter1.net/publ/lstphi.pdf",
latex = "http://www.hutter1.net/publ/lstphi.tex",
slides = "http://www.hutter1.net/publ/slstphi.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "1532-4435",
keywords = "looping suffix trees; Markov decision process;
reinforcement learning; partial observability;
information \& complexity; Monte Carlo search; rational agents.",
abstract = "There has recently been much interest in history-based methods
using suffix trees to solve POMDPs. However, these suffix trees
cannot efficiently represent environments that have long-term
dependencies. We extend the recently introduced CT$\Phi$MDP
algorithm to the space of looping suffix trees which have
previously only been used in solving deterministic POMDPs. The
resulting algorithm replicates results from CT$\Phi$MDP for
environments with short term dependencies, while it outperforms
LSTM-based methods on TMaze, a deep memory environment.",
support = "ARC grant DP120100950",
for = "080401(20%),010405(20%),080101(60%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:12aixiens,
author = "Joel Veness and Peter Sunehag and Marcus Hutter",
title = "On Ensemble Techniques for {AIXI} Approximation",
booktitle = "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})",
series = "LNAI",
volume = "7716",
pages = "341--351",
_editor = "J. Bach and B. Goertzel and M. Ikle",
publisher = "Springer, Heidelberg",
_address = "Oxford, UK",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiens",
pdf = "http://www.hutter1.net/publ/aixiens.pdf",
slides = "http://www.hutter1.net/publ/saixiens.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-35506-6_35",
issn = "0302-9743",
isbn = "978-3-642-35505-9",
keywords = "Ensemble Techniques; AIXI; Universal Artificial Intelligence;
Agent Architectures; Perception and Perceptual Modeling.",
abstract = "One of the key challenges in AIXI approximation is model class
approximation - i.e. how to meaningfully approximate Solomonoff
Induction without requiring an infeasible amount of computation?
This paper advocates a bottom-up approach to this problem, by
describing a number of principled ensemble techniques for
approximate AIXI agents. Each technique works by efficiently
combining a set of existing environment models into a single, more
powerful model. These techniques have the potential to play an
important role in future AIXI approximations.",
support = "ARC grant DP120100950",
for = "080401(20%),010404(30%),080101(30%)",
seo = "970108(100%)",
znote = "Acceptance rate: 34/80 = 42\%.",
}
@InProceedings{Hutter:12aixiopt,
author = "Peter Sunehag and Marcus Hutter",
title = "Optimistic {AIXI}",
booktitle = "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})",
series = "LNAI",
volume = "7716",
pages = "312--321",
_editor = "J. Bach and B. Goertzel and M. Ikle",
publisher = "Springer, Heidelberg",
_address = "Oxford, UK",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiopt",
pdf = "http://www.hutter1.net/publ/aixiopt.pdf",
slides = "http://www.hutter1.net/publ/saixiopt.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-35506-6_32",
issn = "0302-9743",
isbn = "978-3-642-35505-9",
keywords = "AIXI; Reinforcement Learning; Optimism; Optimality; Agents; Prior Sets; Bets.",
abstract = "We consider extending the AIXI agent by using multiple (or even a
compact class of) priors. This has the benefit of weakening the
conditions on the true environment that we need to prove asymptotic
optimality. Furthermore, it decreases the arbitrariness of picking
the prior or reference machine. We connect this to removing symmetry
between accepting and rejecting bets in the rationality
axiomatization of AIXI and replacing it with optimism. Optimism is
often used to encourage exploration in the more restrictive Markov
Decision Process setting and it alleviates the problem that AIXI
(with geometric discounting) stops exploring prematurely.",
support = "ARC grant DP120100950",
for = "080101(70%),220302(30%)",
seo = "970108(100%)",
znote = "Acceptance rate: 34/80 = 42\%.",
}
@InProceedings{Hutter:12windowkt,
author = "Peter Sunehag and Wen Shao and Marcus Hutter",
title = "Coding of Non-Stationary Sources as a Foundation for Detecting Change Points and Outliers in Binary Time-Series",
booktitle = "Proc. 10th Australasian Data Mining Conference ({AusDM'12})",
volume = "134",
pages = "79--84",
_editor = "Yanchang Zhao and Jiuyong Li and Paul Kennedy and Peter Christen",
publisher = "Australian Computer Society",
address = "Sydney, Australia",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#windowkt",
http = "http://crpit.com/abstracts/CRPITV134Sunehag.html",
pdf = "http://www.hutter1.net/publ/windowkt.pdf",
tex = "http://www.hutter1.net/publ/windowkt.tex",
slides = "http://www.hutter1.net/publ/swindowkt.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
issn = "1445-1336",
isbn = "978-1-921770-14-2",
keywords = "non-stationary sources; time-series; change point detection; outlier; compression",
abstract = "An interesting scheme for estimating and adapting distributions in
real-time for non-stationary data has recently been the focus of
study for several different tasks relating to time series and data
mining, namely change point detection, outlier detection and online
compression/ sequence prediction. An appealing feature is that
unlike more sophisticated procedures, it is as fast as the related
stationary procedures which are simply modified through discounting
or windowing. The discount scheme makes older observations lose
their in uence on new predictions. The authors of this article
recently used a discount scheme for introducing an adaptive version
of the Context Tree Weighting compression algorithm. The mentioned
change point and outlier detection methods rely on the changing
compression ratio of an online compression algorithm. Here we are
beginning to provide theoretical foundations for the use of these
adaptive estimation procedures that have already shown practical
promise.",
support = "ARC grant DP120100950",
for = "080401",
seo = "970108(100%)",
znote = "Acceptance rate: 25/55 = 45\%",
}
@InProceedings{Hutter:12optopt,
author = "Peter Sunehag and Marcus Hutter",
title = "Optimistic Agents are Asymptotically Optimal",
booktitle = "Proc. 25th Australasian Joint Conference on Artificial Intelligence ({AusAI'12})",
series = "LNAI",
volume = "7691",
pages = "15--26",
_editor = "Michael Thielscher and Dongmo Zhang",
publisher = "Springer",
address = "Sydney, Australia",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#optopt",
url = "http://arxiv.org/abs/1210.0077",
pdf = "http://www.hutter1.net/publ/optopt.pdf",
latex = "http://www.hutter1.net/publ/optopt.tex",
slides = "http://www.hutter1.net/publ/soptopt.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "0302-9743",
isbn = "978-3-642-35100-6",
doi = "10.1007/978-3-642-35101-3_2",
keywords = "Reinforcement Learning; Optimism; Optimality; Agents; Uncertainty.",
abstract = "We use optimism to introduce generic asymptotically optimal
reinforcement learning agents. They achieve, with an arbitrary
finite or compact class of environments, asymptotically optimal
behavior. Furthermore, in the finite deterministic case we provide
finite error bounds.",
support = "ARC grant DP120100950",
for = "080101(70%),220302(30%)",
seo = "970108(100%)",
znote = "Acceptance rate: 76/196 = 39\%",
}
@InCollection{Hutter:12ctoe2,
author = "Marcus Hutter",
title = "The Subjective Computable Universe",
booktitle = "A Computable Universe: Understanding and Exploring Nature as Computation",
pages = "399--416",
_editor = "Hector Zenil",
publisher = "World Scientific",
_month = dec,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#ctoe2",
pdf = "http://www.hutter1.net/publ/ctoe2.pdf",
latex = "http://www.hutter1.net/publ/ctoe2.zip",
slides = "http://www.hutter1.net/publ/sctoe.pdf",
video = "http://pirsa.org/displayFlash.php?id=18040117",
http = "http://www.worldscientific.com/worldscibooks/10.1142/8306",
project = "http://www.hutter1.net/official/projects.htm#physics",
doi = "10.1142/9789814374309_0021",
isbn = "978-9-814-37429-3",
keywords = "world models; observer localization; computability;
predictive power; Ockham's razor; universal theories;
inductive reasoning; simplicity and complexity.",
abstract = "Nearly all theories developed for our world are computational.
The fundamental theories in physics can be used to emulate on a
computer ever more aspects of our universe. This and the
ubiquity of computers and virtual realities has increased the
acceptance of the computational paradigm. A computable theory
of everything seems to have come within reach. Given the
historic progression of theories from ego- to geo- to
helio-centric models to universe and multiverse theories, the
next natural step was to postulate a multiverse composed of all
computable universes. Unfortunately, rather than being a theory
of everything, the result is more a theory of nothing, which
actually plagues all too-large universe models in which
observers occupy random or remote locations. The problem can be
solved by incorporating the subjective observer process into
the theory. While the computational paradigm exposes a
fundamental problem of large-universe theories, it also
provides its solution.",
for = "080401(70%),020103(30%)",
seo = "970122(100%)",
}
@InProceedings{Hutter:12watershed,
author = "Di Yang and Stephen Gould and Marcus Hutter",
title = "A Noise Tolerant Watershed Transformation with Viscous Force for Seeded Image Segmentation",
booktitle = "Proc. 11th Asian Conf. on Computer Vision ({ACCV'12})",
address = "Daejeon, Korea",
series = "LNCS",
volume = "7724",
pages = "775--789",
_editor = "K. M. Lee and Y. Matsushita and J. M. Rehg and Z. Hu",
publisher = "Springer",
_month = nov,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#watershed",
pdf = "http://www.hutter1.net/publ/watershed.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
issn = "0302-9743",
isbn = "978-3-642-37330-5",
doi = "10.1007/978-3-642-37331-2_58",
keywords = "seeded image segmentation; viscous force; local average path; noise-tolerant.",
abstract = "The watershed transform was proposed as a novel method for image
segmentation over 30 years ago. Today it is still used as an
elementary step in many powerful segmentation procedures. The
watershed transform constitutes one of the main concepts of
mathematical morphology as an important region-based image
segmentation approach. However, the original watershed transform is
highly sensitive to noise and is incapable of detecting objects
with broken edges. Consequently its adoption in domains where
imaging is subject to high noise is limited. By incorporating a
high-order energy term into the original watershed transform, we
proposed the viscous force watershed transform, which is more
immune to noise and able to detect objects with broken edges.",
support = "ControlExpert GmbH",
for = "080106(100%)",
seo = "970108(80%),890205(20%)",
znote = "Acceptance rate: 226/869 = 26\%",
}
@InProceedings{Hutter:12pacmdp,
author = "Tor Lattimore and Marcus Hutter",
title = "{PAC} bounds for discounted {MDP}s",
booktitle = "Proc. 23rd International Conf. on Algorithmic Learning Theory ({ALT'12})",
address = "Lyon, France",
series = "LNAI",
volume = "7568",
_editor = "N.H. Bshouty and G. Stoltz and N. Vayatis and T. Zeugmann",
publisher = "Springer",
pages = "320--334",
_month = oct,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#pacmdp",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT12/",
url = "http://arxiv.org/abs/1202.3890",
pdf = "http://www.hutter1.net/publ/pacmdp.pdf",
slides = "http://www.hutter1.net/publ/spacmdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
doi = "10.1007/978-3-642-34106-9_26",
issn = "0302-9743",
isbn = "3-642-34105-2",
keywords = "Reinforcement learning; sample-complexity;
exploration exploitation; PAC-MDP;
Markov decision processes.",
abstract = "We study upper and lower bounds on the sample-complexity of
learning near-optimal behaviour in finite-state discounted
Markov Decision Processes (MDPs). We prove a new bound for a
modified version of Upper Confidence Reinforcement Learning
(UCRL) with only cubic dependence on the horizon. The bound is
unimprovable in all parameters except the size of the
state/action space, where it depends linearly on the number of
non-zero transition probabilities. The lower bound strengthens
previous work by being both more general (it applies to all
policies) and tighter. The upper and lower bounds match up to
logarithmic factors provided the transition matrix is not too
dense.",
support = "ARC grant DP0988049",
for = "010404(30%),010405(30%),080198(40%).",
seo = "970108(100%)",
znote = "Acceptance rate: 23/47 = 49\%",
}
@InCollection{Hutter:12uaigentle,
author = "Marcus Hutter",
title = "One Decade of Universal Artificial Intelligence",
booktitle = "Theoretical Foundations of Artificial General Intelligence",
pages = "67--88",
_editor = "Pei Wang and Ben Goertzel",
publisher = "Atlantis Press",
_month = sep,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#uaigentle",
url = "http://arxiv.org/abs/1202.6153",
pdf = "http://www.hutter1.net/publ/uaigentle.pdf",
latex = "http://www.hutter1.net/publ/uaigentle.zip",
slides = "http://www.hutter1.net/publ/suaigentle.pdf",
slides2 = "http://www.hutter1.net/publ/suai4lay.pdf",
video = "http://vimeo.com/7321732",
video2 = "http://www.youtube.com/watch?v=I-vx5zbOOXI",
http = "http://2012.singularitysummit.com.au/2012/08/universal-artificial-intelligence/",
project = "http://www.hutter1.net/official/projects.htm#uai",
interview = "http://www.youtube.com/watch?v=a2tgUXm_txw",
doi = "10.2991/978-94-91216-62-6_5",
isbn = "978-94-91216-61-9(print) 978-94-91216-62-6(online)",
keywords = "artificial intelligence; reinforcement learning;
algorithmic information theory; sequential decision theory;
universal induction; rational agents; foundations.",
abstract = "The first decade of this century has seen the nascency of the
first mathematical theory of general artificial intelligence.
This theory of Universal Artificial Intelligence (UAI) has made
significant contributions to many theoretical, philosophical,
and practical AI questions. In a series of papers culminating
in book (Hutter, 2005), an exciting sound and complete
mathematical model for a super intelligent agent (AIXI) has
been developed and rigorously analyzed. While nowadays most AI
researchers avoid discussing intelligence, the award-winning
PhD thesis (Legg, 2008) provided the philosophical embedding
and investigated the UAI-based universal measure of rational
intelligence, which is formal, objective and
non-anthropocentric. Recently, effective approximations of AIXI
have been derived and experimentally investigated in JAIR paper
(Veness et al. 2011). This practical breakthrough has resulted
in some impressive applications, finally muting earlier
critique that UAI is only a theory. For the first time, without
providing any domain knowledge, the same agent is able to
self-adapt to a diverse range of interactive environments. For
instance, AIXI is able to learn from scratch to play TicTacToe,
Pacman, Kuhn Poker, and other games by trial and error, without
even providing the rules of the games.
These achievements give new hope that the grand goal of
Artificial General Intelligence is not elusive.
This article provides an informal overview of UAI in context.
It attempts to gently introduce a very theoretical, formal, and
mathematical subject, and discusses philosophical and technical
ingredients, traits of intelligence, some social questions, and
the past and future of UAI.",
support = "ARC grant DP0988049",
for = "080401(20%),080101(30%),080199(30%),220399(20%)",
seo = "970108(80%),870122(20%)",
}
@InProceedings{Hutter:12ctmrl,
author = "Phuong Nguyen and Peter Sunehag and Marcus Hutter",
title = "Context Tree Maximizing Reinforcement Learning",
booktitle = "Proc. 26th {AAAI} Conference on Artificial Intelligence ({AAAI'12})",
volume = "",
pages = "1075--1082",
_editor = "Jörg Hoffmann and Bart Selman",
publisher = "AAAI Press",
address = "Toronto, Canada",
_month = jul,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#ctmrl",
http = "http://www.aaai.org/ocs/index.php/AAAI/AAAI12/paper/view/5079",
pdf = "http://www.hutter1.net/publ/ctmrl.pdf",
latex = "http://www.hutter1.net/publ/ctmrl.zip",
poster = "http://www.hutter1.net/publ/sctmrl.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
code = "http://www.hutter1.net/publ/ctmrlcode.zip",
isbn = "978-1-57735-568-7",
keywords = "Context Tree Maximization; Markov Decision Process;
Feature Reinforcement Learning.",
abstract = "Recent developments in reinforcement learning for non-
Markovian problems witness a surge in history-based methods,
among which we are particularly interested in two frameworks,
PhiMDP and MC-AIXI-CTW. PhiMDP attempts to reduce
the general RL problem, where the environment's states
and dynamics are both unknown, to an MDP, while MCAIXI-
CTW incrementally learns a mixture of context trees
as its environment model. The main idea of PhiMDP is to connect
generic reinforcement learning with classical reinforcement
learning. The first implementation of PhiMDP relies on a
stochastic search procedure for finding a tree that minimizes a
certain cost function. This does not guarantee finding the minimizing
tree, or even a good one, given limited search time.
As a consequence it appears that the approach has difficulties
with large domains. MC-AIXI-CTW is attractive in that it can
incrementally and analytically compute the internal model
through interactions with the environment. Unfortunately, it
is computationally demanding due to requiring heavy planning
simulations at every single time step. We devise a novel
approach called CTMRL, which analytically and efficiently
finds the cost-minimizing tree. Instead of the context-tree
weighting method that MC-AIXI-CTW is based on, we use
the closely related context-tree maximizing algorithm that selects
just one single tree. This approach falls under the PhiMDP
framework, which allows the replacement of the costly planning
component of MC-AIXI-CTW with simple Q-Learning.
Our empirical investigation shows that CTMRL finds policies
of quality as good as MC-AIXI-CTW's on six domains
including a challenging Pacman domain, but in an order of
magnitude less time.",
support = "ARC grant DP120100950",
for = "080401(20%),010405(20%),080101(60%)",
seo = "970108(100%)",
znote = "Acceptance rate: 294/1129 = 26\%",
}
@TechReport{Hutter:12ssdc,
author = "Joel Veness and Marcus Hutter",
title = "Sparse Sequential {D}irichlet Coding",
institution = "UoA and ANU",
number = "arXiv:1206.3618",
_month = jun,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#ssdc",
url = "http://arxiv.org/abs/1206.3618",
pdf = "http://www.hutter1.net/publ/ssdc.pdf",
latex = "http://www.hutter1.net/publ/ssdc.tex",
slides = "http://www.hutter1.net/publ/ssad.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
keywords = "Dirichlet prior; KT estimator; sparse coding;
small/large alphabet; data compression.",
abstract = "This short paper describes a simple coding technique, Sparse
Sequential Dirichlet Coding, for multi-alphabet memoryless
sources. It is appropriate in situations where only a small,
unknown subset of the possible alphabet symbols can be expected
to occur in any particular data sequence. We provide a
competitive analysis which shows that the performance of Sparse
Sequential Dirichlet Coding will be close to that of a
Sequential Dirichlet Coder that knows in advance the exact
subset of occurring alphabet symbols. Empirically we show that
our technique can perform similarly to the more computationally
demanding Sequential Sub-Alphabet Estimator, while using less
computational resources.",
for = "080401(100%)",
seo = "970108(80%),890205(20%)",
}
@InProceedings{Hutter:12ctswitch,
author = "Joel Veness and Kee Siong Ng and Marcus Hutter and Michael Bowling",
title = "Context Tree Switching",
booktitle = "Proc. Data Compression Conference ({DCC'12})",
pages = "327--336",
_editor = "J. A. Storer and M. W. Marcellin",
publisher = "IEEE Computer Society",
address = "Snowbird, Utah, USA",
_month = apr,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#ctswitch",
url = "http://arxiv.org/abs/1111.3182",
pdf = "http://www.hutter1.net/publ/ctswitch.pdf",
latex = "http://www.hutter1.net/publ/ctswitch.tex",
slides = "http://www.hutter1.net/publ/sctswitch.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
code = "http://jveness.info/software/cts-v1.zip",
doi = "10.1109/DCC.2012.39",
issn = "1068-0314",
isbn = "978-1-4673-0715-4",
keywords = "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
abstract = "This paper describes the Context Tree Switching technique, a
modification of Context Tree Weighting for the prediction of
binary, stationary, n-Markov sources. By modifying Context
Tree Weighting's recursive weighting scheme, it is possible to
mix over a strictly larger class of models without increasing
the asymptotic time or space complexity of the original
algorithm. We prove that this generalization preserves the
desirable theoretical properties of Context Tree Weighting on
stationary n-Markov sources, and show empirically that this new
technique leads to consistent improvements over Context Tree
Weighting as measured on the Calgary Corpus.",
support = "ARC grant DP0988049",
for = "080401(100%)",
seo = "970108(80%),890205(20%)",
}
@InProceedings{Hutter:12adapctw,
author = "Alexander O'Neill and Marcus Hutter and Wen Shao and Peter Sunehag",
title = "Adaptive Context Tree Weighting",
booktitle = "Proc. Data Compression Conference ({DCC'12})",
pages = "317--326",
_editor = "J. A. Storer and M. W. Marcellin",
publisher = "IEEE Computer Society",
address = "Snowbird, Utah, USA",
_month = apr,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#adapctw",
url = "http://arxiv.org/abs/1201.2056",
pdf = "http://www.hutter1.net/publ/adapctw.pdf",
latex = "http://www.hutter1.net/publ/adapctw.tex",
slides = "http://www.hutter1.net/publ/sadapctw.pdf",
project = "http://www.hutter1.net/official/projects.htm#compress",
code = "http://www.hutter1.net/publ/actwcode.zip",
doi = "10.1109/DCC.2012.38",
issn = "1068-0314",
isbn = "978-0-7695-4656-8",
keywords = "adaptive data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.",
abstract = "We describe an adaptive context tree weighting (ACTW)
algorithm, as an extension to the standard context tree
weighting (CTW) algorithm. Unlike the standard CTW algorithm,
which weights all observations equally regardless of the depth,
ACTW gives increasing weight to more recent observations,
aiming to improve performance in cases where the input sequence
is from a non-stationary distribution. Data compression results
show ACTW variants improving over CTW on merged files from
standard compression benchmark tests while never being
significantly worse on any individual file.",
support = "ARC grant DP0988049",
for = "080401(100%)",
seo = "970108(80%),890205(20%)",
}
@Article{Hutter:12singularity,
author = "Marcus Hutter",
title = "Can Intelligence Explode?",
journal = "Journal of Consciousness Studies",
volume = "19",
number = "1-2",
pages = "143--166",
publisher = "Imprint Academic",
_month = feb,
year = "2012",
bibtex = "http://www.hutter1.net/official/bib.htm#singularity",
http = "http://www.ingentaconnect.com/content/imp/jcs/2012/00000019/F0020001/art00010",
url = "http://arxiv.org/abs/1202.6177",
pdf = "http://www.hutter1.net/publ/singularity.pdf",
latex = "http://www.hutter1.net/publ/singularity.tex",
slidespdf = "http://www.hutter1.net/publ/ssingularity.pdf",
slidesppt = "http://www.hutter1.net/publ/ssingularity.pptx",
slidesaudio = "http://www.hutter1.net/publ/sasingularity.pptx",
slidesvideo= "http://www.hutter1.net/publ/svsingularity.avi",
livevideo = "http://www.youtube.com/watch?v=focaMjQbmkI",
http = "http://2012.singularitysummit.com.au/2012/08/can-intelligence-explode/",
project = "http://www.hutter1.net/official/projects.htm#uai",
interview = "http://www.youtube.com/watch?v=omG990F_ETY",
issn = "1355-8250",
keywords = "singularity; acceleration; intelligence; evolution;
rationality; goal; life; value; virtual; computation; AIXI.",
abstract = "The technological singularity refers to a hypothetical scenario
in which technological advances virtually explode. The most
popular scenario is the creation of super-intelligent
algorithms that recursively create ever higher intelligences.
It took many decades for these ideas to spread from science
fiction to popular science magazines and finally to attract the
attention of serious philosophers. David Chalmers' (JCS 2010)
article is the first comprehensive philosophical analysis of
the singularity in a respected philosophy journal. The
motivation of my article is to augment Chalmers' and to discuss
some issues not addressed by him, in particular what it could
mean for intelligence to explode. In this course, I will (have
to) provide a more careful treatment of what intelligence
actually is, separate speed from intelligence explosion,
compare what super-intelligent participants and classical human
observers might experience and do, discuss immediate
implications for the diversity and value of life, consider
possible bounds on intelligence, and contemplate intelligences
right at the singularity.",
for = "080101(40%),140104(10%),220312(50%)",
seo = "970122(100%)",
}
%-------------Publications-of-Marcus-Hutter-2011--------------%
@InProceedings{Hutter:11segm3d,
author = "Srimal Jayawardena and Di Yang and Marcus Hutter",
title = "3D Model Assisted Image Segmentation",
booktitle = "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})",
pages = "51--58",
_editor = "Andrew Bradley and Paul Jackway",
publisher = "IEEE Xplore",
address = "Noosa, Australia",
_month = dec,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#segm3d",
url = "http://arxiv.org/abs/1202.1943",
pdf = "http://www.hutter1.net/publ/segm3d.pdf",
latex = "http://www.hutter1.net/publ/segm3d.zip",
slides = "http://www.hutter1.net/publ/ssegm3d.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
doi = "10.1109/DICTA.2011.17",
isbn = "978-1-4577-2006-2 or 978-0-7695-4588-2",
keywords = "Image segmentation; 3D-2D Registration; Full 3D Pose; Contour Detection; Fully Automatic.",
abstract = "The problem of segmenting a given image into coherent regions
is important in Computer Vision and many industrial
applications require segmenting a known object into its
components. Examples include identifying individual parts of a
component for process control work in a manufacturing plant and
identifying parts of a car from a photo for automatic damage
detection. Unfortunately most of an object's parts of interest
in such applications share the same pixel characteristics,
having similar colour and texture. This makes segmenting the
object into its components a non-trivial task for conventional
image segmentation algorithms. In this paper, we propose a
``Model Assisted Segmentation'' method to tackle this problem. A
3D model of the object is registered over the given image by
optimising a novel gradient based loss function. This
registration obtains the full 3D pose from an image of the
object. The image can have an arbitrary view of the object and
is not limited to a particular set of views. The segmentation
is subsequently performed using a level-set based method, using
the projected contours of the registered 3D model as
initialisation curves. The method is fully automatic and
requires no user interaction. Also, the system does not require
any prior training. We present our results on photographs of a
real car.",
support = "ControlExpert GmbH",
for = "080104(50%),080106(50%)",
znote = "Acceptance rate: 42/200 = 21\% (oral)."
}
@InProceedings{Hutter:11losspose,
author = "Srimal Jayawardena and Marcus Hutter and Nathan Brewer",
title = "A Novel Illumination-Invariant Loss for Monocular 3D Pose Estimation",
booktitle = "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})",
pages = "37--44",
_editor = "Andrew Bradley and Paul Jackway",
publisher = "IEEE Xplore",
address = "Noosa, Australia",
_month = dec,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#losspose",
url = "http://arxiv.org/abs/1011.1035",
pdf = "http://www.hutter1.net/publ/losspose.pdf",
latex = "http://www.hutter1.net/publ/losspose.zip",
slides = "http://www.hutter1.net/publ/slosspose.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
doi = "10.1109/DICTA.2011.15",
isbn = "978-1-4577-2006-2 or 978-0-7695-4588-2",
keywords = "illumination-invariant loss; 2D-3D pose estimation;
pixel-based; featureless; optimisation; 3D model, monocular.",
abstract = "The problem of identifying the 3D pose of a known object from a
given 2D image has important applications in Computer Vision.
Our proposed method of registering a 3D model of a known object
on a given 2D photo of the object has numerous advantages over
existing methods. It does not require prior training, knowledge
of the camera parameters, explicit point correspondences or
matching features between the image and model. Unlike
techniques that estimate a partial 3D pose (as in an overhead
view of traffic or machine parts on a conveyor belt), our
method estimates the complete 3D pose of the object. It works
on a single static image from a given view under varying and
unknown lighting conditions. For this purpose we derive a novel
illumination-invariant distance measure between the 2D photo
and projected 3D model, which is then minimised to find the
best pose parameters. Results for vehicle pose detection in
real photographs are presented.",
support = "ControlExpert GmbH",
for = "080104(50%),080106(50%)",
znote = "Acceptance rate: 42/200 = 21\% (oral)."
}
@InProceedings{Hutter:11uivnfl,
author = "Tor Lattimore and Marcus Hutter",
title = "No Free Lunch versus {O}ccam's Razor in Supervised Learning",
booktitle = "Proc. Solomonoff 85th Memorial Conference",
address = "Melbourne, Australia",
series = "LNAI",
volume = "7070",
pages = "223--235",
_editor = "David Dowe",
publisher = "Springer",
_month = nov,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#uivnfl",
url = "http://arxiv.org/abs/1111.3846",
pdf = "http://www.hutter1.net/publ/uivnfl.pdf",
latex = "http://www.hutter1.net/publ/uivnfl.zip",
slides = "http://www.hutter1.net/publ/suivnfl.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/978-3-642-44958-1_17",
issn = "0302-9743",
isbn = "978-3-642-44957-4",
keywords = "Supervised Learning; Kolmogorov complexity; Occam's Razor; No Free Lunch.",
abstract = "The No Free Lunch theorems are often used to argue that domain
specific knowledge is required to design successful
algorithms. We use algorithmic information theory to argue the
case for a universal bias allowing an algorithm to succeed in
all interesting problem domains. Additionally, we give a new
algorithm for off-line classification, inspired by Solomonoff
induction, with good performance on all structured problems
under reasonable assumptions. This includes a proof of the
efficacy of the well-known heuristic of randomly selecting
training data in the hope of reducing misclassification rates.",
support = "ARC grant DP0988049",
for = "010404(20%),010405(20%),080198(60%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:11aixiaxiom2,
author = "Peter Sunehag and Marcus Hutter",
title = "Principles of {S}olomonoff Induction and {AIXI}",
booktitle = "Proc. Solomonoff 85th Memorial Conference",
address = "Melbourne, Australia",
series = "LNAI",
volume = "7070",
pages = "386--398",
_editor = "David Dowe",
publisher = "Springer",
_month = nov,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiaxiom2",
url = "http://arxiv.org/abs/1111.6117",
pdf = "http://www.hutter1.net/publ/aixiaxiom2.pdf",
latex = "http://www.hutter1.net/publ/aixiaxiom2.tex",
slides = "http://www.hutter1.net/publ/saixiaxiom2.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-44958-1_30",
issn = "0302-9743",
isbn = "978-3-642-44957-4",
keywords = "computability; representation; rationality; Solomonoff induction.",
abstract = "We identify principles underlying Solomonoff Induction. Key
concepts are rationality, computability, indifference and time
consistency. Furthermore, we discuss extensions to the full AI
case to derive AIXI.",
support = "ARC grant DP0988049",
for = "080401(20%),010404(30%),080101(50%)",
seo = "970108(80%),970122(20%)",
}
@InProceedings{Hutter:11unipreq,
author = "Ian Wood and Peter Sunehag and Marcus Hutter",
title = "({N}on-)Equivalence of Universal Priors",
booktitle = "Proc. Solomonoff 85th Memorial Conference",
address = "Melbourne, Australia",
series = "LNAI",
volume = "7070",
pages = "417--425",
_editor = "David Dowe",
publisher = "Springer",
_month = nov,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#unipreq",
url = "http://arxiv.org/abs/1111.3854",
pdf = "http://www.hutter1.net/publ/unipreq.pdf",
latex = "http://www.hutter1.net/publ/unipreq.tex",
slides = "http://www.hutter1.net/publ/sunipreq.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/978-3-642-44958-1_33",
issn = "0302-9743",
isbn = "978-3-642-44957-4",
keywords = "algorithmic information theory; universal induction; universal prior.",
abstract = "Ray Solomonoff invented the notion of universal induction
featuring an aptly termed ``universal'' prior probability
function over all possible computable environments. The
essential property of this prior was its ability to dominate
all other such priors. Later, Levin introduced another
construction --- a mixture of all possible priors or `universal
mixture'. These priors are well known to be equivalent up to
multiplicative constants. Here, we seek to clarify further the
relationships between these three characterisations of a
universal prior (Solomonoff's, universal mixtures, and
universally dominant priors). We see that the the constructions
of Solomonoff and Levin define an identical class of priors,
while the class of universally dominant priors is strictly
larger. We provide some characterisation of the discrepancy.",
support = "ARC grant DP0988049",
for = "010404(40%),010405(30%),080401(30%)",
seo = "970108(100%)",
}
@InProceedings{Hutter:11asyoptag,
author = "Tor Lattimore and Marcus Hutter",
title = "Asymptotically Optimal Agents",
booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
address = "Espoo, Finland",
series = "LNAI",
volume = "6925",
_editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
publisher = "Springer",
pages = "368--382",
_month = oct,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#asyoptag",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
url = "http://arxiv.org/abs/1107.5537",
pdf = "http://www.hutter1.net/publ/asyoptag.pdf",
latex = "http://www.hutter1.net/publ/asyoptag.tex",
slides = "http://www.hutter1.net/publ/sasyoptag.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-24412-4_29",
issn = "0302-9743",
isbn = "3-642-24411-4",
keywords = "Rational agents; sequential decision theory;
artificial general intelligence; reinforcement learning;
asymptotic optimality; general discounting.",
abstract = "Artificial general intelligence aims to create agents capable
of learning to solve arbitrary interesting problems. We define
two versions of asymptotic optimality and prove that no agent
can satisfy the strong version while in some cases, depending
on discounting, there does exist a non-computable weak
asymptotically optimal agent.",
support = "ARC grant DP0988049",
for = "080101(100%)",
znote = "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11evenbits,
author = "Tor Lattimore and Marcus Hutter and Vaibhav Gavane",
title = "Universal Prediction of Selected Bits",
booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
address = "Espoo, Finland",
series = "LNAI",
volume = "6925",
_editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
publisher = "Springer",
pages = "262--276",
_month = oct,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#evenbits",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
url = "http://arxiv.org/abs/1107.5531",
pdf = "http://www.hutter1.net/publ/evenbits.pdf",
latex = "http://www.hutter1.net/publ/evenbits.tex",
slides = "http://www.hutter1.net/publ/sevenbits.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/978-3-642-24412-4_22",
issn = "0302-9743",
isbn = "3-642-24411-4",
keywords = "Sequence prediction; Solomonoff induction;
online classification; discriminative learning;
algorithmic information theory.",
abstract = "Many learning tasks can be viewed as sequence prediction
problems. For example, online classification can be converted
to sequence prediction with the sequence being pairs of
input/target data and where the goal is to correctly predict
the target data given input data and previous input/target
pairs. Solomonoff induction is known to solve the general
sequence prediction problem, but only if the entire sequence is
sampled from a computable distribution. In the case of
classification and discriminative learning though, only the
targets need be structured (given the inputs). We show that the
normalised version of Solomonoff induction can still be used in
this case, and more generally that it can detect any recursive
sub-pattern (regularity) within an otherwise completely
unstructured sequence. It is also shown that the unnormalised
version can fail to predict very simple recursive sub-patterns.",
support = "ARC grant DP0988049",
for = "080401(40%),010404(30%),010405(30%)",
znote = "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11tcdisc,
author = "Tor Lattimore and Marcus Hutter",
title = "Time Consistent Discounting",
booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
address = "Espoo, Finland",
series = "LNAI",
volume = "6925",
_editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
publisher = "Springer",
pages = "383--397",
_month = oct,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#tcdisc",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
url = "http://arxiv.org/abs/1107.5528",
pdf = "http://www.hutter1.net/publ/tcdisc.pdf",
latex = "http://www.hutter1.net/publ/tcdisc.tex",
slides = "http://www.hutter1.net/publ/stcdisc.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
doi = "10.1007/978-3-642-24412-4_30",
issn = "0302-9743",
isbn = "3-642-24411-4",
keywords = "Rational agents; sequential decision theory;
general discounting; time-consistency; game theory.",
abstract = "A possibly immortal agent tries to maximise its summed
discounted rewards over time, where discounting is used to
avoid infinite utilities and encourage the agent to value
current rewards more than future ones. Some commonly used
discount functions lead to time-inconsistent behavior where the
agent changes its plan over time. These inconsistencies can
lead to very poor behavior. We generalise the usual discounted
utility model to one where the discount function changes with
the age of the agent. We then give a simple characterisation of
time-(in)consistent discount functions and show the existence
of a rational policy for an agent that knows its discount
function is time-inconsistent.",
for = "010405(20%),080101(40%),140104(20%),170202(20%)",
seo = "970108(40%),970114(30%),970117(30%)",
znote = "Acceptance rate: 28/61 = 46\%",
}
@InProceedings{Hutter:11aixiaxiom,
author = "Peter Sunehag and Marcus Hutter",
title = "Axioms for Rational Reinforcement Learning",
booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})",
address = "Espoo, Finland",
series = "LNAI",
volume = "6925",
_editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann",
publisher = "Springer",
pages = "338--352",
_month = oct,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiaxiom",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html",
url = "http://arxiv.org/abs/1107.5520",
pdf = "http://www.hutter1.net/publ/aixiaxiom.pdf",
latex = "http://www.hutter1.net/publ/aixiaxiom.tex",
slides = "http://www.hutter1.net/publ/saixiaxiom.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1007/978-3-642-24412-4_27",
issn = "0302-9743",
isbn = "3-642-24411-4",
keywords = "Rationality; Probability; Utility; Banach Space; Linear Functional.",
abstract = "We provide a formal, simple and intuitive theory of rational
decision making including sequential decisions that affect the
environment. The theory has a geometric flavor, which makes the
arguments easy to visualize and understand. Our theory is for
complete decision makers, which means that they have a complete set
of preferences. Our main result shows that a complete rational
decision maker implicitly has a probabilistic model of the
environment. We have a countable version of this result that brings
light on the issue of countable vs finite additivity by showing how
it depends on the geometry of the space which we have preferences
over. This is achieved through fruitfully connecting rationality
with the Hahn-Banach Theorem. The theory presented here can be
viewed as a formalization and extension of the betting odds
approach to probability of Ramsey (1931) and De Finetti (1937).",
support = "ARC grant DP0988049",
for = "080401(20%),010404(30%),080101(50%)",
znote = "Acceptance rate: 28/61 = 46\%",
}
@Proceedings{Hutter:11ewrlproc,
editor = "Scott Sanner and Marcus Hutter",
title = "European Workshop on Reinforcement Learning",
subtitle = "9th European Workshop ({EWRL'11})",
publisher = "Springer",
address = "Athens, Greece",
series = "LNAI",
volume = "7188",
_month = sep,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#ewrlproc11",
http = "http://www.springer.com/computer/ai/book/978-3-642-29945-2",
pdf = "http://www.hutter1.net/publ/ewrlproc11.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "0302-9743",
isbn = "978-3-642-29945-2",
doi = "10.1007/978-3-642-29946-9",
keywords = "artificial intelligence; machine learning;
reinforcement learning; Markov decision process;
function approximation; action; reward; observation; policy; agent.",
abstract = "This book constitutes revised and selected papers of the 9th
European Workshop on Reinforcement Learning, EWRL 2011, which
took place in Athens, Greece in September 2011. The papers
presented were carefully reviewed and selected from 40
submissions. The papers are organized in topical sections
online reinforcement learning, learning and exploring MDPs,
function approximation methods for reinforcement learning,
macro-actions in reinforcement learning, policy search and
bounds, multi-task and transfer reinforcement learning,
multi-agent reinforcement learning, apprenticeship and inverse
reinforcement learning and real-world reinforcement learning.",
for = "080101(50%),080198(50%)",
}
@InProceedings{Hutter:11frlexp,
author = "Phuong Nguyen and Peter Sunehag and Marcus Hutter",
title = "Feature Reinforcement Learning in Practice",
booktitle = "Proc. 9th European Workshop on Reinforcement Learning ({EWRL-9})",
series = "LNAI",
volume = "7188",
pages = "66--77",
publisher = "Springer",
_month = sep,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#frlexp",
url = "http://arxiv.org/abs/1108.3614",
pdf = "http://www.hutter1.net/publ/frlexp.pdf",
slides = "http://www.hutter1.net/publ/sfrlexp.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "0302-9743",
isbn = "978-3-642-29945-2",
doi = "10.1007/978-3-642-29946-9_10",
keywords = "Reinforcement learning; context Markov trees; Markov decision process;
partial observability; information \& complexity;
Monte Carlo search; Kuhn poker; rational agents.",
abstract = "Following a recent surge in using history-based methods for
resolving perceptual aliasing in reinforcement learning, we
introduce an algorithm based on the feature reinforcement
learning framework called $\Phi$MDP \cite{MH09c}. To create a
practical algorithm we devise a stochastic search procedure for
a class of context trees based on parallel tempering and a
specialized proposal distribution. We provide the first
empirical evaluation for $\Phi$MDP. Our proposed algorithm
achieves superior performance to the classical U-tree algorithm
\cite{AKM96} and the recent active-LZ algorithm \cite{Far10},
and is competitive with MC-AIXI-CTW \cite{VNHUS11} that
maintains a bayesian mixture over all context trees up to a
chosen depth. We are encouraged by our ability to compete with
this sophisticated method using an algorithm that simply picks
one single model, and uses Q-learning on the corresponding MDP.
Our $\Phi$MDP algorithm is much simpler, yet consumes less time
and memory. These results show promise for our future work on
attacking more complex and larger problems.",
support = "ARC grant DP0988049",
for = "080401(30%),010405(20%),080101(50%)",
}
@Article{Hutter:11uiphil,
author = "Samuel Rathmanner and Marcus Hutter",
title = "A Philosophical Treatise of Universal Induction",
journal = "Entropy",
volume = "13",
number = "6",
pages = "1076--1136",
_month = jun,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#uiphil",
url = "http://arxiv.org/abs/1105.5721",
pdf = "http://www.hutter1.net/publ/uiphil.pdf",
latex = "http://www.hutter1.net/publ/uiphil.zip",
slides = "http://www.hutter1.net/publ/suiphil.pdf",
video1 = "http://www.youtube.com/watch?v=gb4oXRsw3yA",
video2 = "http://www.youtube.com/watch?v=Q_cHUpwpdFo",
video3 = "https://www.youtube.com/watch?v=bn060on1hKs",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.3390/e13061076",
issn = "1099-4300",
keywords = "sequence prediction; inductive inference; Bayes rule;
Solomonoff prior; Kolmogorov complexity; Occam's razor;
philosophical issues; confirmation theory; Black raven paradox.",
abstract = "Understanding inductive reasoning is a problem that
has engaged mankind for thousands of years. This problem is
relevant to a wide range of fields and is integral to the
philosophy of science. It has been tackled by many great minds
ranging from philosophers to scientists to mathematicians, and
more recently computer scientists. In this article we argue the
case for Solomonoff Induction, a formal inductive framework
which combines algorithmic information theory with the Bayesian
framework. Although it achieves excellent theoretical results
and is based on solid philosophical foundations, the requisite
technical knowledge necessary for understanding this framework
has caused it to remain largely unknown and unappreciated in
the wider scientific community. The main contribution of this
article is to convey Solomonoff induction and its related
concepts in a generally accessible form with the aim of
bridging this current technical gap. In the process we examine
the major historical contributions that have led to the
formulation of Solomonoff Induction as well as criticisms of
Solomonoff and induction in general. In particular we examine
how Solomonoff induction addresses many issues that have
plagued other inductive systems, such as the black ravens
paradox and the confirmation problem, and compare this approach
with other recent approaches.",
for = "080401(30%),010404(30%),170203(10%),220304(30%)",
znote = "Special Issue on Kolmogorov Complexity edited by Paul Vitanyi",
}
@InCollection{Hutter:11randai,
author = "Marcus Hutter",
title = "Algorithmic Randomness as Foundation of Inductive Reasoning and Artificial Intelligence",
booktitle = "Randomness through Computation",
subtitle = "Some Answers, More Questions",
chapter = "12",
_editor = "H. Zenil",
publisher = "World Scientific",
pages = "159--169",
_month = feb,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#randai",
url = "http://arxiv.org/abs/1102.2468",
pdf = "http://www.hutter1.net/publ/randai.pdf",
latex = "http://www.hutter1.net/publ/randai.tex",
project = "http://www.hutter1.net/official/projects.htm#ait",
isbn = "981-4327-74-3",
keywords = "algorithmic information theory; individual randomness;
Ockham's razor; inductive reasoning; artificial intelligence.",
abstract = "This article is a brief personal account of the past, present,
and future of algorithmic randomness, emphasizing its role in
inductive inference and artificial intelligence. It is written
for a general audience interested in science and philosophy.
Intuitively, randomness is a lack of order or predictability.
If randomness is the opposite of determinism, then algorithmic
randomness is the opposite of computability. Besides many other
things, these concepts have been used to quantify Ockham's
razor, solve the induction problem, and define intelligence.",
for = "080401(40%),010405(20%),080101(10%),080199(30%)",
}
@InCollection{Hutter:11unilearn,
author = "Marcus Hutter",
title = "Universal Learning Theory",
booktitle = "Encyclopedia of Machine Learning",
pages = "1001--1008",
editor = "C. Sammut and G. Webb",
publisher = "Springer",
_month = feb,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#unilearn",
url = "http://arxiv.org/abs/1102.2467",
pdf = "http://www.hutter1.net/publ/unilearn.pdf",
latex = "http://www.hutter1.net/publ/unilearn.tex",
slides = "http://www.hutter1.net/ai/susp.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/978-0-387-30164-8",
isbn = "978-0-387-30768-8",
keywords = "Algorithmic probability; Ray Solomonoff; induction;
prediction; decision; action; Turing machine;
Kolmogorov complexity; universal prior; Bayes' rule.",
abstract = "This encyclopedic article gives a mini-introduction into the
theory of universal learning, founded by Ray Solomonoff in the
1960s and significantly developed and extended in the last
decade. It explains the spirit of universal learning, but
necessarily glosses over technical subtleties.",
support = "ARC grant DP0988049",
for = "080401(30%),010405(30%),080198(40%)",
}
@Article{Hutter:11aixictwx,
author = "Joel Veness and Kee Siong Ng and Marcus Hutter and William Uther and David Silver",
title = "A {M}onte-{C}arlo {AIXI} Approximation",
journal = "Journal of Artificial Intelligence Research",
volume = "40",
pages = "95--142",
_publisher = "AAAI Press",
_month = jan,
year = "2011",
bibtex = "http://www.hutter1.net/official/bib.htm#aixictwx",
url = "http://arxiv.org/abs/0909.0801",
pdf = "http://www.hutter1.net/publ/aixictwx.pdf",
latex = "http://www.hutter1.net/publ/aixictwx.zip",
slides = "http://www.hutter1.net/publ/saixictwx.pdf",
award = "http://www.jair.org/bestpaper.html",
project = "http://www.hutter1.net/official/projects.htm#uai",
code = "http://www.hutter1.net/publ/aixictwxcode.zip",
doi = "10.1613/jair.3125",
issn = "1076-9757",
keywords = "Reinforcement Learning (RL);
Context Tree Weighting (CTW);
Monte Carlo Tree Search (MCTS);
Upper Confidence bounds applied to Trees (UCT);
Partially Observable Markov Decision Process (POMDP);
Prediction Suffix Trees (PST).",
abstract = "This paper introduces a principled approach for the design of a
scalable general reinforcement learning agent. Our approach is
based on a direct approximation of AIXI, a Bayesian optimality
notion for general reinforcement learning agents. Previously,
it has been unclear whether the theory of AIXI could motivate
the design of practical algorithms. We answer this hitherto
open question in the affirmative, by providing the first
computationally feasible approximation to the AIXI agent. To
develop our approximation, we introduce a new Monte-Carlo Tree
Search algorithm along with an agent-specific extension to the
Context Tree Weighting algorithm. Empirically, we present a set
of encouraging results on a variety of stochastic and partially
observable domains. We conclude by proposing a number of
directions for future research.",
support = "ARC grant DP0988049",
for = "080401(20%),010404(20%),080101(60%)",
note = "Honorable Mention for the 2014 IJCAI-JAIR Best Paper Prize.",
}
%-------------Publications-of-Marcus-Hutter-2010--------------%
@InProceedings{Hutter:10ctoe,
author = "Marcus Hutter",
title = "Observer Localization in Multiverse Theories",
booktitle = "Proceedings of the Conference in Honour of Murray Gell-Mann's 80th Birthday",
subtitle = "Quantum Mechanics, Elementary Particles, Quantum Cosmology and Complexity",
pages = "638--645",
_editor = "H. Fritzsch and K. K. Phua",
publisher = "World Scientific",
_month = nov,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#ctoe",
pdf = "http://www.hutter1.net/publ/ctoe.pdf",
latex = "http://www.hutter1.net/publ/ctoe.tex",
slides = "http://www.hutter1.net/publ/sctoe.pdf",
video = "http://pirsa.org/displayFlash.php?id=18040117",
project = "http://www.hutter1.net/official/projects.htm#physics",
doi = "10.1142/9789814335614_0069",
isbn = "9814335606",
keywords = "world models; observer localization; predictive power;
Ockham's razor; universal theories; computability.",
abstract = "The progression of theories suggested for our world, from ego- to
geo- to helio-centric models to universe and multiverse theories and
beyond, shows one tendency: The size of the described worlds
increases, with humans being expelled from their center to ever more
remote and random locations. If pushed too far, a potential theory
of everything (TOE) is actually more a theories of nothing (TON).
Indeed such theories have already been developed. I show that
including observer localization into such theories is necessary and
sufficient to avoid this problem. I develop a quantitative recipe to
identify TOEs and distinguish them from TONs and theories
in-between. This precisely shows what the problem is with some
recently suggested universal TOEs.",
for = "080401(70%),020103(30%)",
}
@TechReport{Hutter:10lorpc,
author = "Minh-Ngoc Tran and Marcus Hutter",
title = "Model Selection by Loss Rank for Classification and Unsupervised Learning",
institution = "NUS and ANU",
address = "Singapore and Australia",
number = "arXiv:1011.1379",
pages = "1--20",
_month = nov,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#lorpc",
url = "http://arxiv.org/abs/1011.1379",
pdf = "http://www.hutter1.net/ai/lorpc.pdf",
latex = "http://www.hutter1.net/ai/lorpc.zip",
slides = "http://www.hutter1.net/ai/slorp.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
keywords = "Classification; graphical models; loss rank principle; model selection.",
abstract = "Hutter (2007) recently introduced the loss rank principle
(LoRP) as a generalpurpose principle for model selection. The
LoRP enjoys many attractive properties and deserves further
investigations. The LoRP has been well-studied for regression
framework in Hutter and Tran (2010). In this paper, we study
the LoRP for classification framework, and develop it further
for model selection problems in unsupervised learning where the
main interest is to describe the associations between input
measurements, like cluster analysis or graphical modelling.
Theoretical properties and simulation studies are presented.",
for = "080401(20%),010405(50%),080198(30%)",
seo = "970101(70%),970108(30%)",
}
@Proceedings{Hutter:10altproc,
editor = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
title = "Algorithmic Learning Theory",
subtitle = "21st International Conference ({ALT'10})",
publisher = "Springer",
address = "Canberra, Australia",
series = "LNAI",
volume = "6331",
_month = oct,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#altproc10",
http = "http://www.springer.com/computer/ai/book/978-3-642-16107-0",
pdf = "http://www.hutter1.net/publ/altproc10.pdf",
project = "http://www.hutter1.net/official/projects.htm#other",
issn = "0302-9743",
isbn = "978-3-642-16107-0",
doi = "10.1007/978-3-642-16108-7",
keywords = "statistical learning, grammatical inference, graph learning,
PAC learning, query learning, algorithmic teaching, online learning,
inductive inference, reinforcement learning, Kernel methods",
abstract = "The LNAI series reports state-of-the-art results in artificial
intelligence research, development, and education. This volume (LNAI
6331) contains research papers presented at the 21st International
Conference on Algorithmic Learning Theory (ALT 2007), which was held
in Canberra (Australia) during October 6-8, 2010. The main objective
of the conference was to provide an interdisciplinary forum for
high-quality talks with a strong theoretical background and
scientific interchange in areas such as statistical learning,
grammatical inference, graph learning, PAC learning, query learning,
algorithmic teaching, online learning, inductive inference,
reinforcement learning, Kernel methods. The conference was
co-located with the 13th International Conference on Discovery
Science (DS 2010). The volume includes 26 technical contributions
that were selected from 44 submissions, and five invited talks
presented to the audience of ALT and DS. Longer versions of the DS
invited papers are available in the proceedings of DS 2010.",
for = "080401(20%),010405(20%),080199(60%)",
znote = "Acceptance rate: 26/44 = 59\%",
}
@InProceedings{Hutter:10altintro,
author = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann",
title = "Algorithmic Learning Theory 2010: Editors' Introduction",
booktitle = "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})",
address = "Canberra, Australia",
series = "LNAI",
volume = "6331",
publisher = "Springer",
pages = "1--10",
_month = oct,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#altintro10",
pdf = "http://www.hutter1.net/publ/altintro10.pdf",
project = "http://www.hutter1.net/official/projects.htm#other",
issn = "0302-9743",
isbn = "978-3-642-16107-0",
doi = "10.1007/978-3-642-16108-7_1",
keywords = "algorithmic learning theory, query models, online
learning, inductive inference, boosting, kernel methods, complexity
and learning, reinforcement learning, unsupervised learning,
grammatical inference, algorithmic forecasting.",
abstract = "Learning theory is an active research area that incorporates ideas,
problems, and techniques from a wide range of disciplines including
statistics, artificial intelligence, information theory, pattern
recognition, and theoretical computer science. The research reported
at the 21st International Conference on Algorithmic Learning Theory
(ALT 2010) ranges over areas such as query models, online learning,
inductive inference, boosting, kernel methods, complexity and
learning, reinforcement learning, unsupervised learning, grammatical
inference, and algorithmic forecasting. In this introduction we give
an overview of the five invited talks and the regular contributions
of ALT 2010.",
for = "080401(20%),010405(20%),080199(60%)",
}
@InProceedings{Hutter:10phimp,
author = "Peter Sunehag and Marcus Hutter",
title = "Consistency of Feature {M}arkov Processes",
booktitle = "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})",
address = "Canberra, Australia",
series = "LNAI",
volume = "6331",
publisher = "Springer",
pages = "360--374",
_month = oct,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#phimp",
url = "http://arxiv.org/abs/1007.2075",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT10/alt10.jhtml",
pdf = "http://www.hutter1.net/publ/phimp.pdf",
latex = "http://www.hutter1.net/publ/phimp.tex",
slides = "http://www.hutter1.net/publ/sphimp.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "0302-9743",
isbn = "978-3-642-16107-0",
doi = "10.1007/978-3-642-16108-7_29",
keywords = "Markov Process (MP); Hidden Markov Model (HMM);
Finite State Machine (FSM);
Probabilistic Deterministic Finite State Automata (PDFA);
Penalized Maximum Likelihood (PML);
ergodicity; asymptotic consistency; suffix trees; model selection;
learning; reduction; side information; reinforcement learning.",
abstract = "We are studying long term sequence prediction (forecasting). We
approach this by investigating criteria for choosing a compact
useful state representation. The state is supposed to summarize
useful information from the history. We want a method that is
asymptotically consistent in the sense it will provably eventually
only choose between alternatives that satisfy an optimality property
related to the used criterion. We extend our work to the case where
there is side information that one can take advantage of and,
furthermore, we briefly discuss the active setting where an agent
takes actions to achieve desirable outcomes.",
support = "ARC grant DP0988049",
for = "080401(30%),010405(30%),080101(20%),080198(20%)",
znote = "Acceptance rate: 26/44 = 59\%",
}
@Article{Hutter:10ctoex,
author = "Marcus Hutter",
title = "A Complete Theory of Everything (will be subjective)",
journal = "Algorithms",
volume = "3",
number = "4",
pages = "329--350",
_month = sep,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#ctoex",
url = "http://arxiv.org/abs/0912.5434",
pdf = "http://www.hutter1.net/publ/ctoex.pdf",
latex = "http://www.hutter1.net/publ/ctoex.tex",
slides = "http://www.hutter1.net/publ/sctoe.pdf",
video = "http://pirsa.org/displayFlash.php?id=18040117",
art = "http://www.hutter1.net/publ/ctoel.jpg",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.3390/a3040329",
issn = "1999-4893",
keywords = "world models; observer localization; predictive power;
Ockham's razor; universal theories; inductive reasoning;
simplicity and complexity; universal self-sampling;
no-free-lunch; computability.",
abstract = "Increasingly encompassing models have been suggested for our world.
Theories range from generally accepted to increasingly speculative
to apparently bogus. The progression of theories from ego- to geo-
to helio-centric models to universe and multiverse theories and
beyond was accompanied by a dramatic increase in the sizes of the
postulated worlds, with humans being expelled from their center to
ever more remote and random locations. Rather than leading to a true
theory of everything, this trend faces a turning point after which
the predictive power of such theories decreases (actually to zero).
Incorporating the location and other capacities of the observer into
such theories avoids this problem and allows to distinguish
meaningful from predictively meaningless theories. This also leads
to a truly complete theory of everything consisting of a
(conventional objective) theory of everything plus a (novel
subjective) observer process. The observer localization is neither
based on the controversial anthropic principle, nor has it anything
to do with the quantum-mechanical observation process. The suggested
principle is extended to more practical (partial, approximate,
probabilistic, parametric) world models (rather than theories of
everything). Finally, I provide a justification of Ockham's razor,
and criticize the anthropic principle, the doomsday argument, the no
free lunch theorem, and the falsifiability dogma.",
for = "080401(70%),020103(30%)",
}
@TechReport{Hutter:10pdpx,
author = "Wray Buntine and Marcus Hutter",
title = "A {B}ayesian Review of the {P}oisson-{D}irichlet Process",
institution = "NICTA and ANU",
address = "Australia",
number = "arXiv:1007.0296",
_month = jul,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#pdpx",
url = "http://arxiv.org/abs/1007.0296",
pdf = "http://www.hutter1.net/publ/pdpx.pdf",
latex = "http://www.hutter1.net/publ/pdpx.zip",
slides = "http://www.hutter1.net/publ/spdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
keywords = "Pitman-Yor process; Dirichlet;
two-parameter Poisson-Dirichlet process;
Chinese Restaurant Process; Consistency;
(non)atomic distributions;
Bayesian interpretation.",
abstract = "The two parameter Poisson-Dirichlet process is also known as the
Pitman-Yor Process and related to the Chinese Restaurant Process, is
a generalisation of the Dirichlet Process, and is increasingly
being used for probabilistic modelling in discrete areas such as
language and images. This article reviews the theory of the
Poisson-Dirichlet process in terms of its consistency for
estimation, the convergence rates and the posteriors of data. This
theory has been well developed for continuous distributions (more
generally referred to as non-atomic distributions). This article
then presents a Bayesian interpretation of the Poisson-Dirichlet
process: it is a mixture using an improper and infinite dimensional
Dirichlet distribution. This interpretation requires technicalities
of priors, posteriors and Hilbert spaces, but conceptually, this
means we can understand the process as just another Dirichlet and
thus all its sampling properties fit naturally. Finally, this
article also presents results for the discrete case which is the
case seeing widespread use now in computer science, but which has
received less attention in the literature.",
for = "080404(50%),080405(50%)",
}
@InProceedings{Hutter:10aixictw,
author = "Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver",
title = "Reinforcement Learning via {AIXI} Approximation",
booktitle = "Proc. 24th AAAI Conference on Artificial Intelligence",
pages = "605--611",
_editor = "Maria Fox and David Poole",
publisher = "AAAI Press",
address = "Atlanta, USA",
_month = jul,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#aixictw",
url = "http://arxiv.org/abs/1007.2049",
pdf = "http://www.hutter1.net/publ/aixictw.pdf",
latex = "http://www.hutter1.net/publ/aixictw.zip",
slides = "http://www.hutter1.net/publ/saixictw.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
code = "http://www.jveness.info/software/mc-aixi-src-1.0.zip",
keywords = "Reinforcement Learning (RL);
Context Tree Weighting (CTW);
Monte Carlo Tree Search (MCTS);
Upper Confidence bounds applied to Trees (UCT);
Partially Observable Markov Decision Process (POMDP);
Prediction Suffix Trees (PST).",
abstract = "This paper introduces a principled approach for the design of a
scalable general reinforcement learning agent. This approach is
based on a direct approximation of AIXI, a Bayesian optimality
notion for general reinforcement learning agents. Previously, it has
been unclear whether the theory of AIXI could motivate the design of
practical algorithms. We answer this hitherto open question in the
affirmative, by providing the first computationally feasible
approximation to the AIXI agent. To develop our approximation, we
introduce a Monte Carlo Tree Search algorithm along with an
agent-specific extension of the Context Tree Weighting algorithm.
Empirically, we present a set of encouraging results on a number of
stochastic, unknown, and partially observable domains.",
support = "ARC grant DP0988049",
for = "080401(20%),010404(20%),080101(60%)",
znote = "Acceptance rate: 264/982 = 27\%",
}
@Article{Hutter:10cnlohx,
author = "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
title = "An Integrated {B}ayesian Analysis of {LOH} and Copy Number Data",
journal = "BMC Bioinformatics",
volume = "11",
number = "321",
pages = "1--18",
_month = jun,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#cnlohx",
http = "http://www.biomedcentral.com/1471-2105/11/321",
supplement = "http://www.biomedcentral.com/imedia/1222342299388240/supp2.pdf",
pdf = "http://www.hutter1.net/publ/cnlohx.pdf",
slides = "http://www.hutter1.net/publ/scnloh.pdf",
poster = "http://www.hutter1.net/publ/pcnloh.pdf",
project = "http://www.hutter1.net/official/projects.htm#big",
code = "http://www.biomedcentral.com/imedia/1280629245356661/supp1.zip",
doi = "10.1186/1471-2105-11-321",
issn = "1471-2105",
keywords = "Bayesian regression; piecewise constant function;
change point problem; DNA copy number estimation; LOH estimation",
abstract = "Background: Cancer and other disorders are due to genomic lesions.
SNP-microarrays are able to measure simultaneously both genotype and
copy number (CN) at several Single Nucleotide Polymorphisms (SNPs)
along the genome. CN is defined as the number of DNA copies, and the
normal is two, since we have two copies of each chromosome. The
genotype of a SNP is the status given by the nucleotides (alleles)
which are present on the two copies of DNA. It is defined homozygous
or heterozygous if the two alleles are the same or if they differ,
respectively. Loss of heterozygosity (LOH) is the loss of the
heterozygous status due to genomic events. Combining CN and LOH
data, it is possible to better identify different types of genomic
aberrations. For example, a long sequence of homozygous SNPs might
be caused by either the physical loss of one copy or a uniparental
disomy event (UPD), i.e. each SNP has two identical nucleotides both
derived from only one parent. In this situation, the knowledge of
the CN can help in distinguishing between these two events.
Results: To better identify genomic aberrations, we propose a method
(called gBPCR) which infers the type of aberration occurred, taking
into account all the possible influence in the microarray detection
of the homozygosity status of the SNPs, resulting from an altered CN
level. Namely, we model the distributions of the detected genotype,
given a specific genomic alteration and we estimate the parameters
involved on public reference datasets. The estimation is performed
similarly to the modified Bayesian Piecewise Constant Regression,
but with improved estimators for the detection of the breakpoints.
Using artificial and real data, we evaluate the quality of the
estimation of gBPCR and we also show that it outperforms other
well-known methods for LOH estimation.
Conclusions: We propose a method (gBPCR) for the estimation of both
LOH and CN aberrations, improving their estimation by integrating
both types of data and accounting for their relationships. Moreover,
gBPCR performed very well in comparison with other methods for LOH
estimation and the estimated CN lesions on real data have been
validated with another technique.",
support = "Swiss National Science Foundation grants 205321-112430 and 205320-121886/1;
On-cosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008;
Cantone Ticino Ticino in rete grant;
Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)",
alt = "Also talk at 10th ISBA and IWPACBB'09",
for = "010405(50%),060405(50%)",
}
@Book{Hutter:10agiproc,
editor = "Eric Baum and Marcus Hutter and Emanuel Kitzelmann",
title = "Artificial General Intelligence",
subtitle = "3rd Conference ({AGI'10}) in Memoriam Ray Solomonoff",
publisher = "Atlantis Press",
address = "Lugano, Switzerland",
_month = mar,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#agiproc10",
http = "http://www.atlantis-press.com/publications/aisr/AGI-10/",
pdf = "http://www.hutter1.net/ai/agifb10.pdf",
pdffull = "http://www.hutter1.net/ai/agiproc10.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "1951-6851",
isbn = "978-90-78677-36-9",
abstract = "The Conference on Artificial General Intelligence is the only major
conference series devoted wholly and specifically to the creation of
AI systems possessing general intelligence at the human level and
ultimately beyond. Its third installation, AGI-10, was help in Lugano,
Switzerland, March 5-8, 2010, in Memoriam Ray Solomonoff (1926-2009),
pioneer of machine learning, founder of algorithmic probability theory,
and father of the universal theory of inductive inference.
The conference attracted 66 paper submissions of which
29 (i.e., 44\%) were accepted as full papers for presentation at the
conference. Additional 12 papers were included as short position papers.
The program also included a keynote address by the reinforcement learning
pioneer Richard Sutton, two post-conference workshops, and a number of
pre-conference tutorials on various topics related to AGI.",
for = "080101(50%),080199(50%)",
}
@Article{Hutter:10lorpx,
author = "Marcus Hutter and Minh Tran",
title = "Model Selection with the Loss Rank Principle",
volume = "54",
journal = "Computational Statistics and Data Analysis",
publisher = "Elsevier",
pages = "1288--1306",
_month = feb,
year = "2010",
bibtex = "http://www.hutter1.net/official/bib.htm#lorpx",
url = "http://arxiv.org/abs/1003.0516",
pdf = "http://www.hutter1.net/ai/lorpx.pdf",
ps = "http://www.hutter1.net/ai/lorpx.ps",
latex = "http://www.hutter1.net/ai/lorpx.zip",
slides = "http://www.hutter1.net/ai/slorp.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
code = "http://www.hutter1.net/ai/lorpcode.zip",
doi = "10.1016/j.csda.2009.11.015",
issn = "0167-9473",
keywords = "Model selection, loss rank principle,
non-parametric regression, classification
general loss function, k nearest neighbors.",
abstract = "A key issue in statistics and machine learning is to automatically
select the ``right'' model complexity, e.g., the number of neighbors
to be averaged over in k nearest neighbor (kNN) regression or the
polynomial degree in regression with polynomials. We suggest a novel
principle - the Loss Rank Principle (LoRP) - for model selection in
regression and classification. It is based on the loss rank, which
counts how many other (fictitious) data would be fitted better. LoRP
selects the model that has minimal loss rank. Unlike most penalized
maximum likelihood variants (AIC, BIC, MDL), LoRP depends only on
the regression functions and the loss function. It works without a
stochastic noise model, and is directly applicable to any
non-parametric regressor, like kNN.",
for = "080401(20%),010405(80%)",
}
%-------------Publications-of-Marcus-Hutter-2009--------------%
@InProceedings{Hutter:09mdltvp,
author = "Marcus Hutter",
title = "Discrete {MDL} Predicts in Total Variation",
booktitle = "Advances in Neural Information Processing Systems 22 ({NIPS'09})",
pages = "817--825",
_editor = "Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta",
publisher = "Curran Associates",
address = "Cambridge, MA, USA",
_month = dec,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#mdltvp",
url = "http://arxiv.org/abs/0909.4588",
pdf = "http://www.hutter1.net/ai/mdltvp.pdf",
ps = "http://www.hutter1.net/ai/mdltvp.ps",
latex = "http://www.hutter1.net/ai/mdltvp.tex",
slides = "http://www.hutter1.net/ai/smdltvp.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
isbn = "1615679111",
keywords = "minimum description length; countable model class;
total variation distance; sequence prediction;
discriminative learning; reinforcement learning.",
abstract = "The Minimum Description Length (MDL) principle selects the model
that has the shortest code for data plus model. We show that for a
countable class of models, MDL predictions are close to the true
distribution in a strong sense. The result is completely general. No
independence, ergodicity, stationarity, identifiability, or other
assumption on the model class need to be made. More formally, we
show that for any countable class of models, the distributions
selected by MDL (or MAP) asymptotically predict (merge
with) the true measure in the class in total variation distance.
Implications for non-i.i.d. domains like time-series forecasting,
discriminative learning, and reinforcement learning are discussed.",
for = "080401(30%),010405(50%),080198(20%)",
znote = "Acceptance rate: 263/1105 = 24\%",
}
@InProceedings{Hutter:09wheel,
author = "Marcus Hutter and Nathan Brewer",
title = "Matching 2-D Ellipses to 3-D Circles with Application to Vehicle Pose Estimation",
booktitle = "Proc. 24th Conf. on Image and Vision Computing New Zealand ({IVCNZ'09})",
pages = "153--158",
_editor = "Donald Bailey",
publisher = "IEEE Xplore",
address = "Wellington, New Zealand",
_month = nov,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#wheel",
url = "http://arxiv.org/abs/0912.3589",
pdf = "http://www.hutter1.net/ai/wheel.pdf",
latex = "http://www.hutter1.net/ai/wheel.zip",
slides = "http://www.hutter1.net/ai/swheel.pdf",
project = "http://www.hutter1.net/official/projects.htm#icar",
code = "http://www.hutter1.net/ai/wheelcode.zip",
doi = "10.1109/IVCNZ.2009.5378421",
issn = "2151-2205",
keywords = "computer vision; image recognition/processing; ellipse detection; 3d models;
2d-ellipse to 3d-circle matching; single image pose identification;
wheel detection; 3d vehicle models.",
abstract = "Finding the three-dimensional representation of all or a part of a
scene from a single two dimensional image is a challenging task. In
this paper we propose a method for identifying the pose and location
of objects with circular protrusions in three dimensions from a
single image and a 3d representation or model of the object of
interest. To do this, we present a method for identifying ellipses
and their properties quickly and reliably with a novel technique
that exploits intensity differences between objects and a geometric
technique for matching an ellipse in 2d to a circle in 3d.
We apply these techniques to the specific problem of determining the
pose and location of vehicles, particularly cars, from a single
image. We have achieved excellent pose recovery performance on
artificially generated car images and show promising results on real
vehicle images. We also make use of the ellipse detection method to
identify car wheels from images, with a very high successful match
rate.",
support = "ControlExpert GmbH",
znote = "Acceptance rate: 79/142 = 56\%",
}
@Article{Hutter:09mbpcrcode,
author = "Paola M.V. Rancoita and Marcus Hutter",
title = "mBPCR: A Package for DNA Copy Number Profile Estimation",
journal = "BioConductor -- Open Source Software for BioInformatics",
number = "0.99",
pages = "1--25",
_month = oct,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#mbpcrcode",
url = "http://www.bioconductor.org/packages/devel/bioc/html/mBPCR.html",
pdf = "http://www.hutter1.net/ai/mbpcrcode.pdf",
project = "http://www.hutter1.net/official/projects.htm#big",
code = "http://www.hutter1.net/ai/mbpcrcode.tar.gz",
keywords = "Bayesian regression, exact polynomial algorithm, piecewise constant function,
mBPCR, DNA copy number estimation, micro arrays, genomic aberrations, R package.",
abstract = "The algorithm mBPCR is a tool for estimating the profile of the
log2ratio of copy number data. The procedure is a Bayesian piecewise
constant regression and can be applied, generally, to estimate any
piecewise constant function (like the log2ratio of the copy number
data). The algorithm has been implemented in R and integrated into
bioconductor, an open source software for bioinformatics. This
document describes how to use the mBPCR bioconductor package in
general and on several examples.",
support = "SNF grant 205321-112430",
}
@Article{Hutter:09phimdpx,
author = "Marcus Hutter",
title = "Feature Reinforcement Learning: Part {I}: Unstructured {MDP}s",
journal = "Journal of Artificial General Intelligence",
volume = "1",
pages = "3--24",
_month = oct,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#phimdpx",
url = "http://arxiv.org/abs/0906.1713",
pdf = "http://www.hutter1.net/ai/phimdpx.pdf",
ps = "http://www.hutter1.net/ai/phimdpx.ps",
latex = "http://www.hutter1.net/ai/phimdpx.tex",
slides = "http://www.hutter1.net/ai/sphimdp.pdf",
video = "http://www.vimeo.com/7390883",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "1946-0163",
keywords = "Reinforcement learning; Markov decision process;
partial observability; feature learning; explore-exploit;
information \& complexity; rational agents.",
abstract = "General-purpose, intelligent, learning agents cycle through
sequences of observations, actions, and rewards that are complex,
uncertain, unknown, and non-Markovian. On the other hand,
reinforcement learning is well-developed for small finite state
Markov decision processes (MDPs). Up to now, extracting the right
state representations out of bare observations, that is, reducing
the general agent setup to the MDP framework, is an art that
involves significant effort by designers. The primary goal of this
work is to automate the reduction process and thereby significantly
expand the scope of many existing reinforcement learning algorithms
and the agents that employ them. Before we can think of mechanizing
this search for suitable MDPs, we need a formal objective criterion.
The main contribution of this article is to develop such a
criterion. I also integrate the various parts into one learning
algorithm. Extensions to more realistic dynamic Bayesian networks
are developed in Part II. The role of POMDPs is also considered there.",
}
@Article{Hutter:09phidbnx,
author = "M. Hutter",
title = "Feature Reinforcement Learning: Part {II}: Structured {MDP}s",
journal = "Journal of Artificial General Intelligence",
pages = "71--86",
_month = jun,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#phidbnx",
pdf = "http://www.hutter1.net/publ/phidbnx.pdf",
slides = "http://www.hutter1.net/publ/sphimdp.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.2478/jagi-2021-0003"
keywords = "Reinforcement learning; dynamic Bayesian network; structure learning;
feature selection; global vs. local reward; explore-exploit;
information \& complexity; rational agents; partial observability",
abstract = "The Feature Markov Decision Processes (PhiMDP) model developed
in Part I is well-suited for learning agents in general environments.
Nevertheless, unstructured (Phi)MDPs are limited to relatively
simple environments. Structured MDPs like Dynamic Bayesian Networks
(DBNs) are used for large-scale real-world problems.
In this article I extend PhiMDP to PhiDBN.
The primary contribution is to derive a cost criterion that allows
to automatically extract the most relevant features from the
environment, leading to the ``best'' DBN representation.
I discuss all building blocks required for a complete general
learning algorithm, and compare the novel PhiDBN model to the
prevalent POMDP approach.",
}
@Article{Hutter:09aixiopen,
author = "Marcus Hutter",
title = "Open Problems in Universal Induction \& Intelligence",
journal = "Algorithms",
volume = "3",
number = "2",
pages = "879--906",
_month = jul,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#aixiopen",
url = "http://arxiv.org/abs/0907.0746",
pdf = "http://www.hutter1.net/ai/aixiopen.pdf",
ps = "http://www.hutter1.net/ai/aixiopen.ps",
latex = "http://www.hutter1.net/ai/aixiopen.tex",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.3390/a2030879",
issn = "1999-4893",
keywords = "Kolmogorov complexity; information theory;
sequential decision theory; reinforcement learning;
artificial intelligence; universal Solomonoff induction;
rational agents.",
abstract = "Specialized intelligent systems can be found everywhere: finger
print, handwriting, speech, and face recognition, spam filtering,
chess and other game programs, robots, et al. This decade the first
presumably complete {\em mathematical} theory of artificial
intelligence based on universal induction-prediction-decision-action
has been proposed. This information-theoretic approach solidifies
the foundations of inductive inference and artificial intelligence.
Getting the foundations right usually marks a significant progress
and maturing of a field. The theory provides a gold standard and
guidance for researchers working on intelligent algorithms. The
roots of universal induction have been laid exactly half-a-century
ago and the roots of universal intelligence exactly one decade ago.
So it is timely to take stock of what has been achieved and what
remains to be done. Since there are already good recent surveys, I
describe the state-of-the-art only in passing and refer the reader
to the literature. This article concentrates on the open problems in
universal induction and its extension to universal intelligence.",
}
@InProceedings{Hutter:09cnloh,
author = "Paola M.V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
title = "Bayesian Joint Estimation of {CN} and {LOH} Aberrations",
booktitle = "Proc. 3rd International Workshop on Practical Applications of Computational Biology & Bioinformatics ({IWPACBB'09}) ",
volume = "5518",
series = "LNCS",
pages = "1109--1117",
_editor = "S. Omatu et al.",
publisher = "Springer",
address = "Salamanca, Spain",
_month = jun,
year = "2009",
url = "http://iwpacbb.usal.es/",
pdf = "http://www.hutter1.net/publ/cnloh.pdf",
slides = "http://www.hutter1.net/publ/scnloh.pdf",
poster = "http://www.hutter1.net/publ/pcnloh.pdf",
http = "http://iwpacbb.usal.es/",
doi = "10.1007/978-3-642-02481-8_168",
issn = "0302-9743",
isbn = "978-3-642-02480-1",
keywords = "Bayesian regression; piecewise constant function;
change point problem; DNA copy number estimation; LOH estimation",
abstract = "SNP-microarrays are able to measure simultaneously both copy number
and genotype at several single nucleotide polymorphism positions.
Combining the two data, it is possible to better identify genomic
aberrations. For this purpose, we propose a Bayesian piecewise
constant regression which infers the type of aberration occurred,
taking into account all the possible influence in the microarray
detection of the genotype, resulting from an altered copy number
level. Namely, we model the distributions of the detected genotype
given a specific genomic alteration and we estimate the
hyper-parameters used on public reference datasets.",
support = "Swiss National Science Foundation grant 205321-112430;
Oncosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008;
Cantone Ticino ``Ticino in rete'' grant;
Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)",
}
@InProceedings{Hutter:09ldof,
author = "Ke Zhang and Marcus Hutter and Warren Jin",
title = "A New Local Distance-based Outlier Detection Approach for Scattered Real-World Data",
booktitle = "Proc. 13th Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD'09)",
series = "LNAI",
volume = "5467",
pages = "813--822",
_editor = "T. Theeramunkong and B. Kijsirikul and N. Cercone and H. T. Bao",
publisher = "Springer",
address = "Bangkok, Thailand",
_month = apr,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#ldof",
url = "http://arxiv.org/abs/0903.3257",
pdf = "http://www.hutter1.net/ai/ldof.pdf",
ps = "http://www.hutter1.net/ai/ldof.ps",
latex = "http://www.hutter1.net/ai/ldof.zip",
slides = "http://www.hutter1.net/ai/sldof.pdf",
project = "http://www.hutter1.net/official/projects.htm#???",
doi = "10.1007/978-3-642-01307-2_84",
issn = "0302-9743 ",
isbn = "978-3-642-01306-5",
keywords = "local outlier; scattered data; k-distance; KNN; LOF; LDOF.",
abstract = "Detecting outliers which are grossly different from or inconsistent
with the remaining dataset is a major challenge in real-world KDD
applications. Existing outlier detection methods are ineffective on
scattered real-world datasets due to implicit data patterns and
parameter setting issues. We define a novel ``Local
Distance-based Outlier Factor'' (LDOF) to measure the outlier-ness
of objects in scattered datasets which addresses these issues. LDOF
uses the relative location of an object to its neighbours to
determine the degree to which the object deviates from its
neighbourhood.
Properties of LDOF are theoretically analysed including LDOF's lower
bound and its false-detection probability, as well as parameter
settings. In order to facilitate parameter settings in real-world
applications, we employ a top-n technique in our outlier detection
approach, where only the objects with the highest LDOF values are
regarded as outliers. Compared to conventional approaches (such as
top-n KNN and top-n LOF), our method top-n LDOF is more
effective at detecting outliers in scattered data. It is also easier
to set parameters, since its performance is relatively stable over a
large range of parameter values, as illustrated by experimental
results on both real-world and synthetic datasets.",
znote = "Acceptance rate: 111/338 = 33\%",
}
@Article{Hutter:09alttcs,
author = "Marcus Hutter and Rocco A. Servedio",
title = "{ALT'07} Special Issue",
journal = "Theoretical Computer Science",
_editor = "Marcus Hutter and Rocco A. Servedio",
volume = "410",
number = "19",
pages = "1747--1748/1912",
_month = apr,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#alttcs",
http = "http://www.sciencedirect.com/science/journal/03043975/410/19",
doi = "10.1016/j.tcs.2009.01.008",
issn = "0304-3975",
keywords = "algorithmic learning theory, special issue, preface",
abstract = "This special issue contains expanded versions of papers that appeared in
preliminary form in the proceedings of the 18th International Conference
on Algorithmic Learning Theory (ALT 2007), which was held in Sendai,
Japan during October 1--4, 2007. \emph{Algorithmic Learning Theory} is
a conference series which is dedicated to the theoretical study of the
algorithmic aspects of learning. The best papers of the conference ALT 2007
were invited for this special issue and after a thorough reviewing process,
most of them qualified for this Special Issue on Algorithmic Learning Theory
of Theoretical Computer Science. The preface contains a short introduction
to each of these papers.",
}
@Article{Hutter:09improbx,
author = "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter",
title = "Limits of Learning about a Categorical Latent Variable under Prior Near-Ignorance",
journal = "International Journal of Approximate Reasoning",
volume = "50",
number = "4",
pages = "597--611",
_month = apr,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#improbx",
url = "http://arxiv.org/abs/0904.4527",
pdf = "http://www.hutter1.net/ai/improbx.pdf",
ps = "http://www.hutter1.net/ai/improbx.ps",
latex = "http://www.hutter1.net/ai/improbx.tex",
slides = "http://www.hutter1.net/ai/simprob.pdf",
project = "http://www.hutter1.net/official/projects.htm#robust",
doi = "10.1016/j.ijar.2008.08.003",
issn = "0888-613X",
keywords = "Near-ignorance set of priors; Latent variables; Imprecise Dirichlet model.",
abstract = "In this paper, we consider the coherent theory of (epistemic)
uncertainty of Walley, in which beliefs are represented through sets
of probability distributions, and we focus on the problem of
modeling prior ignorance about a categorical random variable. In
this setting, it is a known result that a state of prior ignorance
is not compatible with learning. To overcome this problem, another
state of beliefs, called \emph{near-ignorance}, has been proposed.
Near-ignorance resembles ignorance very closely, by satisfying some
principles that can arguably be regarded as necessary in a state of
ignorance, and allows learning to take place. What this paper does,
is to provide new and substantial evidence that also near-ignorance
cannot be really regarded as a way out of the problem of starting
statistical inference in conditions of very weak beliefs. The key to
this result is focusing on a setting characterized by a variable of
interest that is \emph{latent}. We argue that such a setting is by
far the most common case in practice, and we provide, for the case
of categorical latent variables (and general \emph{manifest}
variables) a condition that, if satisfied, prevents learning to take
place under prior near-ignorance. This condition is shown to be
easily satisfied even in the most common statistical problems. We
regard these results as a strong form of evidence against the
possibility to adopt a condition of prior near-ignorance in real
statistical problems.",
}
@TechReport{Hutter:09bayestreex,
author = "Marcus Hutter",
title = "Exact Non-Parametric {B}ayesian Inference on Infinite Trees",
number = "0903.5342",
institution = "ARXIV",
_month = mar,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#bayestreex",
url = "http://arxiv.org/abs/0903.5342",
pdf = "http://www.hutter1.net/ai/bayestreex.pdf",
ps = "http://www.hutter1.net/ai/bayestreex.ps",
latex = "http://www.hutter1.net/ai/bayestreex.zip",
slides = "http://www.hutter1.net/ai/sbayestree.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
code = "http://www.hutter1.net/ai/bayestree.c",
keywords = "Bayesian density estimation, exact linear time algorithm,
non-parametric inference, adaptive infinite tree, Polya tree,
scale invariance, consistency, asymptotics.",
msc = "62G07; 60B10; 68W99",
abstract = "Given i.i.d. data from an unknown distribution, we consider the
problem of predicting future items. An adaptive way to estimate
the probability density is to recursively subdivide the domain to
an appropriate data-dependent granularity. A Bayesian would assign
a data-independent prior probability to ``subdivide'', which leads
to a prior over infinite(ly many) trees. We derive an exact, fast,
and simple inference algorithm for such a prior, for the data
evidence, the predictive distribution, the effective model
dimension, moments, and other quantities. We prove asymptotic
convergence and consistency results, and illustrate the behavior
of our model on some prototypical functions.",
}
@Book{Hutter:09agiproc,
editor = "Ben Goertzel and Pascal Hitzler and Marcus Hutter",
title = "Artificial General Intelligence",
subtitle = "2nd Conference ({AGI'09})",
publisher = "Atlantis Press",
address = "Arlington, USA",
_month = mar,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#agiproc09",
http = "http://www.atlantis-press.com/publications/aisr/AGI-09/",
pdf = "http://www.hutter1.net/ai/agifb09.pdf",
pdfall = "http://www.hutter1.net/ai/agiproc09.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
issn = "1951-6851",
isbn = "978-90-78677-24-6",
abstract = "The Conference on Artificial General Intelligence is the only major
conference series devoted wholly and specifically to the creation of
AI systems possessing general intelligence at the human level and
ultimately beyond. Its second installation, AGI-09, in Arlington,
Virginia, March 6-9, 2009, attracted 67 paper submissions, which is
a substantial increase from the previous year. Of these submissions,
33 (i.e., 49\%) were accepted as full papers for presentation at the
conference. Additional 13 papers were included as position papers.
The program also included a keynote address by J{\"u}rgen
Schmidhuber on \emph{The New AI}, a post-conference workshop on
\emph{The Future of AI}, and a number of pre-conference tutorials on
various topics related to AGI.",
}
@InProceedings{Hutter:09phimdp,
author = "Marcus Hutter",
title = "Feature {M}arkov Decision Processes",
booktitle = "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})",
subtitle = "Advances in Intelligent Systems Research",
volume = "8",
pages = "61--66",
publisher = "Atlantis Press",
_address = "Arlington, Virginia",
_month = mar,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#phimdp",
url = "http://arXiv.org/abs/0812.4580",
pdf = "http://www.hutter1.net/ai/phimdp.pdf",
ps = "http://www.hutter1.net/ai/phimdp.ps",
latex = "http://www.hutter1.net/ai/phimdp.tex",
slides = "http://www.hutter1.net/ai/sphimdp.pdf",
video = "http://www.vimeo.com/7390883",
award = "http://agi-conf.org/2009/kurzweilprize.php",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.2991/agi.2009.30",
issn = "1951-6851",
isbn = "978-90-78677-24-6",
keywords = "Reinforcement learning; Markov decision process;
partial observability; feature learning; explore-exploit.",
abstract = "General purpose intelligent learning agents cycle through
(complex,non-MDP) sequences of observations, actions, and rewards.
On the other hand, reinforcement learning is well-developed for
small finite state Markov Decision Processes (MDPs). So far it is an
art performed by human designers to extract the right state
representation out of the bare observations, i.e. to reduce the
agent setup to the MDP framework. Before we can think of mechanizing
this search for suitable MDPs, we need a formal objective criterion.
The main contribution of this article is to develop such a
criterion. I also integrate the various parts into one learning
algorithm. Extensions to more realistic dynamic Bayesian networks
are developed in a companion article.",
znote = "Acceptance rate: 33/67 = 49\%. First Runner-Up for the Kurzweil Best Paper Award",
}
@InProceedings{Hutter:09phidbn,
author = "Marcus Hutter",
title = "Feature Dynamic {B}ayesian Networks",
booktitle = "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})",
subtitle = "Advances in Intelligent Systems Research",
volume = "8",
pages = "67--73",
publisher = "Atlantis Press",
_address = "Arlington, Virginia",
_month = mar,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#phidbn",
url = "http://arXiv.org/abs/0812.4581",
pdf = "http://www.hutter1.net/ai/phidbn.pdf",
ps = "http://www.hutter1.net/ai/phidbn.ps",
latex = "http://www.hutter1.net/ai/phidbn.tex",
slides = "http://www.hutter1.net/ai/sphimdp.pdf",
video = "http://www.vimeo.com/7390883",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.2991/agi.2009.6",
issn = "1951-6851",
isbn = "978-90-78677-24-6",
keywords = "Reinforcement learning; dynamic Bayesian network;
structure learning; feature learning;
global vs. local reward; explore-exploit.",
abstract = "Feature Markov Decision Processes (PhiMDPs) are well-suited for
learning agents in general environments. Nevertheless, unstructured
(Phi)MDPs are limited to relatively simple environments. Structured
MDPs like Dynamic Bayesian Networks (DBNs) are used for large-scale
real-world problems. In this article I extend PhiMDP to PhiDBN. The
primary contribution is to derive a cost criterion that allows to
automatically extract the most relevant features from the
environment, leading to the ``best'' DBN representation. I discuss all
building blocks required for a complete general learning algorithm.",
znote = "Acceptance rate: 33/67 = 49\%",
}
@Article{Hutter:09idmx,
author = "Marcus Hutter",
title = "Practical Robust Estimators under the {I}mprecise {D}irichlet {M}odel",
journal = "International Journal of Approximate Reasoning",
volume = "50",
number = "2",
pages = "231--242",
_month = feb,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#idmx",
url = "http://arxiv.org/abs/0901.4137",
pdf = "http://www.hutter1.net/ai/idmx.pdf",
ps = "http://www.hutter1.net/ai/idmx.ps",
latex = "http://www.hutter1.net/ai/idmx.tex",
slides = "http://www.hutter1.net/ai/sidm.pdf",
project = "http://www.hutter1.net/official/projects.htm#robust",
doi = "10.1016/j.ijar.2008.03.020",
issn = "0888-613X",
keywords = "Imprecise Dirichlet Model; exact, conservative, approximate,
robust, credible interval estimates; entropy; mutual
information.",
abstract = "Walley's Imprecise Dirichlet Model (IDM) for categorical i.i.d.\
data extends the classical Dirichlet model to a set of priors. It
overcomes several fundamental problems which other approaches to
uncertainty suffer from. Yet, to be useful in practice, one needs
efficient ways for computing the imprecise=robust sets or
intervals. The main objective of this work is to derive exact,
conservative, and approximate, robust and credible interval
estimates under the IDM for a large class of statistical
estimators, including the entropy and mutual information.",
}
@Article{Hutter:09bcna,
author = "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee",
title = "Bayesian {DNA} Copy Number Analysis",
journal = "BMC Bioinformatics",
volume = "10",
number = "10",
pages = "1--19",
_month = jan,
year = "2009",
bibtex = "http://www.hutter1.net/official/bib.htm#bcna",
http = "http://www.biomedcentral.com/1471-2105/10/10",
supplement = "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s2.pdf",
pdf = "http://www.hutter1.net/ai/bcna.pdf",
slides = "http://www.hutter1.net/ai/sbcna.pdf",
code = "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s1.zip",
doi = "10.1186/1471-2105-10-10",
issn = "1471-2105",
keywords = "Bayesian regression, exact polynomial algorithm, piecewise constant function,
mBPCR, DNA copy number estimation, micro arrays, genomic aberrations.",
abstract = "Background: Some diseases, like tumors, can be related to
chromosomal aberrations, leading to changes of DNA copy number. The
copy number of an aberrant genome can be represented as a piecewise
constant function, since it can exhibit regions of deletions or
gains. Instead, in a healthy cell the copy number is two because we
inherit one copy of each chromosome from each our parents. Bayesian
Piecewise Constant Regression (BPCR) is a Bayesian regression method
for data that are noisy observations of a piecewise constant
function. The method estimates the unknown segment number, the
endpoints of the segments and the value of the segment levels of the
underlying piecewise constant function. The Bayesian Regression
Curve (BRC) estimates the same data with a smoothing curve. However,
in the original formulation, some estimators failed to properly
determine the corresponding parameters. For example, the boundary
estimator did not take into account the dependency among the
boundaries and succeeded in estimating more than one breakpoint at
the same position, losing segments.
Results: We derived an improved version of the BPCR (called mBPCR)
and BRC, changing the segment number estimator and the boundary
estimator to enhance the fitting procedure. We also proposed an
alternative estimator of the variance of the segment levels, which
is useful in case of data with high noise. Using artificial data, we
compared the original and the modified version of BPCR and BRC with
other regression methods, showing that our improved version of BPCR
generally outperformed all the others. Similar results were also
observed on real data.
Conclusions: We propose an improved method for DNA copy number
estimation, mBPCR, which performed very well compared to previously
published algorithms. In particular, mBPCR was more powerful in the
detection of the true position of the breakpoints and of small
aberrations in very noisy data. Hence, from a biological point of
view, our method can be very useful, for example, to find targets of
genomic aberrations in clinical cancer samples.",
support = "SNF grant 205321-112430",
znote = "Marked as highly accessed.",
alt = "Also 2-page abstract and poster at 9th ISBA and 18th MASAMB meetings (2008)",
abstract2p = "http://www.hutter1.net/publ/bcnas.pdf",
poster = "http://www.hutter1.net/publ/sbcnas.pdf",
}
%-------------Publications-of-Marcus-Hutter-2008--------------%
@Article{Hutter:08actoptx,
author = "Daniil Ryabko and Marcus Hutter",
title = "On the Possibility of Learning in Reactive Environments with Arbitrary Dependence",
journal = "Theoretical Computer Science",
volume = "405",
number = "3",
pages = "274--284",
_month = oct,
year = "2008",
bibtex = "http://www.hutter1.net/official/bib.htm#actoptx",
url = "http://arxiv.org/abs/0810.5636",
pdf = "http://www.hutter1.net/ai/actoptx.pdf",
ps = "http://www.hutter1.net/ai/actoptx.ps",
latex = "http://www.hutter1.net/ai/actoptx.tex",
slides = "http://www.hutter1.net/ai/sactopt.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1016/j.tcs.2008.06.039",
issn = "0304-3975",
keywords = "Reinforcement learning, asymptotic average value,
self-optimizing policies, (non) Markov decision processes.",
abstract = "We address the problem of reinforcement learning in which
observations may exhibit an arbitrary form of stochastic dependence
on past observations and actions, i.e.\ environments more general
than (PO)MDPs. The task for an agent is to attain the best possible
asymptotic reward where the true generating environment is unknown
but belongs to a known countable family of environments. We find
some sufficient conditions on the class of environments under which
an agent exists which attains the best asymptotic reward for any
environment in the class. We analyze how tight these conditions are
and how they relate to different probabilistic assumptions known in
reinforcement learning and related fields, such as Markov Decision
Processes and mixing conditions.",
support = "SNF grant 200020-107616",
}
@InProceedings{Hutter:08phi,
author = "M. Hutter",
title = "Predictive Hypothesis Identification",
booktitle = "Presented at 9th Valencia /ISBA 2010 Meeting",
pages = "1--16",
address = "Benidorm",
_month = sep,
year = "2008",
bibtex = "http://www.hutter1.net/official/bib.htm#phi",
url = "http://arxiv.org/abs/0809.1270",
pdf = "http://www.hutter1.net/ai/phi.pdf",
ps = "http://www.hutter1.net/ai/phi.ps",
latex = "http://www.hutter1.net/ai/phi.tex",
slides = "http://www.hutter1.net/ai/sphi.pdf",
poster = "http://www.hutter1.net/ai/pphi.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
keywords = "parameter estimation; hypothesis testing; model selection;
predictive inference; composite hypotheses; MAP versus ML;
moment fitting; Bayesian statistics.",
abstract = "While statistics focusses on hypothesis testing and on
estimating (properties of) the true sampling distribution, in
machine learning the performance of learning algorithms on
future data is the primary issue. In this paper we bridge the
gap with a general principle (PHI) that identifies hypotheses
with best predictive performance. This includes predictive
point and interval estimation, simple and composite hypothesis
testing, (mixture) model selection, and others as special
cases. For concrete instantiations we will recover well-known
methods, variations thereof, and new ones. PHI nicely
justifies, reconciles, and blends (a reparametrization
invariant variation of) MAP, ML, MDL, and moment estimation.
One particular feature of PHI is that it can genuinely deal
with nested hypotheses.",
}
@InProceedings{Hutter:08select,
author = "Kassel Hingee and Marcus Hutter",
title = "Equivalence of Probabilistic Tournament and Polynomial Ranking Selection",
booktitle = "Proc. 2008 Congress on Evolutionary Computation ({CEC'08})",
pages = "564--571",
publisher = "IEEE",
address = "Hongkong",
isbn = "978-1-4244-1823-7",
_month = jun,
year = "2008",
bibtex = "http://www.hutter1.net/official/bib.htm#select",
url = "http://arxiv.org/abs/0803.2925",
pdf = "http://www.hutter1.net/ai/select.pdf",
ps = "http://www.hutter1.net/ai/select.ps",
latex = "http://www.hutter1.net/ai/select.zip",
slides = "http://www.hutter1.net/ai/sselect.pdf",
project = "http://www.hutter1.net/official/projects.htm#optimize",
doi = "10.1109/CEC.2008.4630852",
keywords = "evolutionary algorithms, ranking selection,
tournament selection, equivalence, efficiency.",
abstract = "Crucial to an Evolutionary Algorithm's performance is its selection
scheme. We mathematically investigate the relation between
polynomial rank and probabilistic tournament methods which are
(respectively) generalisations of the popular linear ranking and
tournament selection schemes. We show that every probabilistic
tournament is equivalent to a unique polynomial rank scheme. In
fact, we derived explicit operators for translating between these
two types of selection. Of particular importance is that most linear
and most practical quadratic rank schemes are probabilistic
tournaments.",
}
@Article{Hutter:08pquestx,
author = "Daniil Ryabko and Marcus Hutter",
title = "Predicting Non-Stationary Processes",
journal = "Applied Mathematics Letters",
volume = "21",
number = "5",
pages = "477--482",
_month = may,
year = "2008",
bibtex = "http://www.hutter1.net/official/bib.htm#pquestx",
url = "http://arxiv.org/abs/cs.LG/0606077",
pdf = "http://www.hutter1.net/ai/pquestx.pdf",
ps = "http://www.hutter1.net/ai/pquestx.ps",
latex = "http://www.hutter1.net/ai/pquestx.tex",
slides = "http://www.hutter1.net/ai/spquest.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
doi = "10.1016/j.aml.2007.04.004",
issn = "0893-9659",
keywords = "sequence prediction, local absolute continuity,
non-stationary measures, average/expected criteria,
absolute/KL divergence, mixtures of measures.",
abstract = "Suppose we are given two probability measures on the set of
one-way infinite finite-alphabet sequences and consider the
question when one of the measures predicts the other, that is,
when conditional probabilities converge (in a certain sense) when
one of the measures is chosen to generate the sequence. This
question may be considered a refinement of the problem of sequence
prediction in its most general formulation: for a given class of
probability measures, does there exist a measure which predicts
all of the measures in the class? To address this problem, we find
some conditions on local absolute continuity which are sufficient
for prediction and which generalize several different notions
which are known to be sufficient for prediction. We also formulate
some open questions to outline a direction for finding the
conditions on classes of measures for which prediction is
possible.",
support = "SNF grant 200020-107616",
}
@Article{Hutter:08kolmo,
author = "Marcus Hutter",
title = "Algorithmic Complexity",
journal = "Scholarpedia",
volume = "3",
number = "1",
pages = "2573",
_month = jan,
year = "2008",
bibtex = "http://www.hutter1.net/official/bib.htm#kolmo",
http = "http://www.scholarpedia.org/article/Algorithmic_Complexity",
pdf = "http://www.hutter1.net/ai/kolmo.pdf",
ps = "http://www.hutter1.net/ai/kolmo.ps",
latex = "http://www.hutter1.net/ai/kolmo.zip",
slides = "http://www.hutter1.net/ai/sintro2kc.pdf",
video = "http://pirsa.org/displayFlash.php?id=18040109",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.4249/scholarpedia.2573",
issn = "1941-6016",
keywords = "algorithmic information theory,
prefix code, prefix Turing machine,
Universal Turing machine, Kolmogorov complexity,
plain complexity, prefix complexity.",
abstract = "The information content or complexity of an object can be measured
by the length of its shortest description. For instance the string
`01010101010101010101010101010101' has the short description ``16
repetitions of 01'', while `11001000011000011101111011101100'
presumably has no simpler description other than writing down the
string itself. More formally, the Algorithmic ``Kolmogorov''
Complexity (AC) of a string $x$ is defined as the length of the
shortest program that computes or outputs $x$, where the program is
run on some fixed reference universal computer.",
}
%-------------Publications-of-Marcus-Hutter-2007--------------%
@InProceedings{Hutter:07qlearn,
author = "Marcus Hutter and Shane Legg",
title = "Temporal Difference Updating without a Learning Rate",
booktitle = "Advances in Neural Information Processing Systems 20",
pages = "705--712",
_editor = "J.C. Platt and D. Koller and Y. Singer and S. Roweis",
publisher = "Curran Associates",
address = "Cambridge, MA, USA",
_month = dec,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#qlearn",
url = "http://arxiv.org/abs/0810.5631",
pdf = "http://www.hutter1.net/ai/qlearn.pdf",
ps = "http://www.hutter1.net/ai/qlearn.ps",
latex = "http://www.hutter1.net/ai/qlearn.zip",
poster = "http://www.hutter1.net/ai/sqlearn.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
keywords = "reinforcement learning; temporal difference;
eligibility trace; variational principle; learning rate.",
abstract = "We derive an equation for temporal difference learning from
statistical principles. Specifically, we start with the variational
principle and then bootstrap to produce an updating rule for
discounted state value estimates. The resulting equation is similar
to the standard equation for temporal difference learning with
eligibility traces, so called TD(lambda), however it lacks the
parameter alpha that specifies the learning rate. In the place
of this free parameter there is now an equation for the learning
rate that is specific to each state transition. We experimentally
test this new learning rule against TD(lambda) and find that it
offers superior performance in various settings. Finally, we make
some preliminary investigations into how to extend our new temporal
difference algorithm to reinforcement learning. To do this we
combine our update equation with both Watkins' Q(lambda) and
Sarsa(lambda) and find that it again offers superior performance
without a learning rate parameter.",
for = "080101(100%)",
znote = "Acceptance rate: 217/975 = 22\%",
}
@InProceedings{Hutter:07intest,
author = "Shane Legg and Marcus Hutter",
title = "Tests of Machine Intelligence",
booktitle = "50 Years of Artificial Intelligence",
booksubtitle = "Essays Dedicated to the 50th Anniversary of Artificial Intelligence",
address = "Monte Verita, Switzerland",
series = "LNAI",
volume = "4850",
_editor = "M. Lungarella, F. Iida, J. Bongard, R. Pfeifer",
pages = "232--242",
_month = dec,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#intest",
url = "http://arxiv.org/abs/0712.3825",
pdf = "http://www.hutter1.net/ai/intest.pdf",
ps = "http://www.hutter1.net/ai/intest.ps",
latex = "http://www.hutter1.net/ai/intest.tex",
poster = "http://www.hutter1.net/ai/siors.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
press = "http://www.hutter1.net/official/press.htm#mim",
doi = "10.1007/978-3-540-77296-5_22",
issn = "0302-9743",
isbn = "978-3-540-77295-8",
keywords = "Turing test and derivatives; Compression tests; Linguistic complexity;
Multiple cognitive abilities; Competitive games;
Psychometric tests; Smith's test; C-test; Universal intelligence",
abstract = "Although the definition and measurement of intelligence is clearly
of fundamental importance to the field of artificial intelligence,
no general survey of definitions and tests of machine intelligence
exists. Indeed few researchers are even aware of alternatives to
the Turing test and its many derivatives. In this paper we fill
this gap by providing a short survey of the many tests of machine
intelligence that have been proposed.",
support = "SNF grant 200020-107616",
}
@Article{Hutter:07iorx,
author = "Shane Legg and Marcus Hutter",
title = "Universal Intelligence: A Definition of Machine Intelligence",
volume = "17",
number = "4",
journal = "Minds \& Machines",
pages = "391--444",
_month = dec,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#iorx",
url = "http://arxiv.org/abs/0712.3329",
pdf = "http://www.hutter1.net/ai/iorx.pdf",
ps = "http://www.hutter1.net/ai/iorx.ps",
latex = "http://www.hutter1.net/ai/iorx.zip",
poster = "http://www.hutter1.net/ai/sior.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
press = "http://www.hutter1.net/official/press.htm#mim",
doi = "10.1007/s11023-007-9079-x",
issn = "0924-6495",
keywords = "AIXI, complexity theory, intelligence,
theoretical foundations, Turing test,
intelligence tests/measures/definitions",
abstract = "A fundamental problem in artificial intelligence is that nobody really
knows what intelligence is. The problem is especially acute when we
need to consider artificial systems which are significantly different
to humans. In this paper we approach this problem in the following
way: We take a number of well known informal definitions of human
intelligence that have been given by experts, and extract their
essential features. These are then mathematically formalised to
produce a general measure of intelligence for arbitrary machines. We
believe that this equation formally captures the concept of machine
intelligence in the broadest reasonable sense. We then show how this
formal definition is related to the theory of universal optimal
learning agents. Finally, we survey the many other tests and
definitions of intelligence that have been proposed for machines.",
support = "SNF grant 200020-107616",
}
@Article{Hutter:07pcregx,
author = "Marcus Hutter",
title = "Exact {B}ayesian Regression of Piecewise Constant Functions",
journal = "Bayesian Analysis",
volume = "2",
number = "4",
pages = "635--664",
_month = dec,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#pcregx",
url = "http://arxiv.org/abs/math.ST/0606315",
pdf = "http://www.hutter1.net/ai/pcregx.pdf",
ps = "http://www.hutter1.net/ai/pcregx.ps",
latex = "http://www.hutter1.net/ai/pcregx.tex",
slides = "http://www.hutter1.net/ai/spcreg.pdf",
award = "http://bayesian.org/project/lindley-prize/",
project = "http://www.hutter1.net/official/projects.htm#bayes",
code = "http://www.hutter1.net/ai/cpcreg.zip",
doi = "10.1214/07-BA225",
issn = "1936-0975",
keywords = "Bayesian regression, exact polynomial algorithm,
non-parametric inference, piecewise constant function,
dynamic programming, change point problem.",
abstract = "We derive an exact and efficient Bayesian regression algorithm for
piecewise constant functions of unknown segment number, boundary
locations, and levels. The derivation works for any noise and segment
level prior, e.g.\ Cauchy which can handle outliers. We derive
simple but good estimates for the in-segment variance. We also
propose a Bayesian regression curve as a better way of smoothing
data without blurring boundaries. The Bayesian approach also allows
straightforward determination of the evidence, break probabilities
and error estimates, useful for model selection and significance and
robustness studies. We discuss the performance on synthetic and
real-world examples. Many possible extensions are discussed.",
note = "Lindley prize for innovative research in Bayesian statistics.",
}
@Proceedings{Hutter:07altproc,
editor = "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto",
title = "Algorithmic Learning Theory",
subtitle = "18th International Conference ({ALT'07})",
publisher = "Springer",
address = "Sendai, Japan",
series = "LNAI",
volume = "4754",
_month = oct,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#altproc07",
http = "http://www.springer.com/computer/ai/book/978-3-540-75224-0",
pdf = "http://www.hutter1.net/ai/altproc07.pdf",
project = "http://www.hutter1.net/official/projects.htm#other",
doi = "10.1007/978-3-540-75225-7",
issn = "0302-9743",
isbn = "978-3-540-75224-0",
keywords = "algorithmic learning theory, query models, online
learning, inductive inference, boosting, kernel methods, complexity
and learning, reinforcement learning, unsupervised learning,
grammatical inference, algorithmic forecasting.",
abstract = "The LNAI series reports state-of-the-art results in artificial
intelligence research, development, and education. This volume (LNAI
4754) contains research papers presented at the 18th International
Conference on Algorithmic Learning Theory (ALT 2007), which was held
in Sendai (Japan) during October 1-4, 2007. The main objective of
the conference was to provide an interdisciplinary forum for
high-quality talks with a strong theoretical background and
scientific interchange in areas such as query models, online
learning, inductive inference, boosting, kernel methods, complexity
and learning, reinforcement learning, unsupervised learning,
grammatical inference, and algorithmic forecasting. The conference
was co-located with the 10th International Conference on Discovery
Science (DS 2007). The volume includes 25 technical contributions
that were selected from 50 submissions, and five invited talks
presented to the audience of ALT and DS. Longer versions of the
DS invited papers are available in the proceedings of DS 2007.",
znote = "Acceptance rate: 25/50 = 50\%",
}
@InProceedings{Hutter:07altintro,
author = "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto",
title = "Algorithmic Learning Theory 2007: Editors' Introduction",
booktitle = "Proc. 18th International Conf. on Algorithmic Learning Theory ({ALT'07})",
address = "Sendai, Japan",
series = "LNAI",
volume = "4754",
publisher = "Springer",
pages = "1--8",
_month = oct,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#altintro07",
pdf = "http://www.hutter1.net/ai/altintro07.pdf",
ps = "http://www.hutter1.net/ai/altintro07.ps",
latex = "http://www.hutter1.net/ai/altintro07.tex",
project = "http://www.hutter1.net/official/projects.htm#other",
issn = "0302-9743",
isbn = "3-540-75224-2",
doi = "10.1007/978-3-540-75225-7_1",
keywords = "algorithmic learning theory, query models, online
learning, inductive inference, boosting, kernel methods, complexity
and learning, reinforcement learning, unsupervised learning,
grammatical inference, algorithmic forecasting.",
abstract = "Learning theory is an active research area that incorporates ideas,
problems, and techniques from a wide range of disciplines including
statistics, artificial intelligence, information theory, pattern
recognition, and theoretical computer science. The research reported
at the 18th International Conference on Algorithmic Learning Theory
(ALT 2007) ranges over areas such as unsupervised learning,
inductive inference, complexity and learning, boosting and
reinforcement learning, query learning models, grammatical
inference, online learning and defensive forecasting, and kernel
methods. In this introduction we give an overview of the five
invited talks and the regular contributions of ALT 2007.",
}
@Article{Hutter:07uspx,
author = "Marcus Hutter",
title = "On Universal Prediction and {B}ayesian Confirmation",
journal = "Theoretical Computer Science",
volume = "384",
number = "1",
pages = "33--48",
_month = sep,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#uspx",
url = "http://arxiv.org/abs/0709.1516",
pdf = "http://www.hutter1.net/ai/uspx.pdf",
ps = "http://www.hutter1.net/ai/uspx.ps",
latex = "http://www.hutter1.net/ai/uspx.tex",
slides = "http://www.hutter1.net/ai/susp.pdf",
poster = "http://www.hutter1.net/ai/susps.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
doi = "10.1016/j.tcs.2007.05.016",
issn = "0304-3975",
keywords = "Sequence prediction, Bayes, Solomonoff prior,
Kolmogorov complexity, Occam's razor, prediction bounds,
model classes, philosophical issues, symmetry principle,
confirmation theory, reparametrization invariance,
old-evidence/updating problem, (non)computable environments.",
abstract = "The Bayesian framework is a well-studied and successful framework
for inductive reasoning, which includes hypothesis testing and
confirmation, parameter estimation, sequence prediction,
classification, and regression. But standard statistical guidelines
for choosing the model class and prior are not always available or
fail, in particular in complex situations.
Solomonoff completed the Bayesian framework by providing a
rigorous, unique, formal, and universal choice for the model class
and the prior. We discuss in breadth how and in which sense
universal (non-i.i.d.) sequence prediction solves various
(philosophical) problems of traditional Bayesian sequence
prediction. We show that Solomonoff's model possesses many
desirable properties: Strong total and weak instantaneous bounds,
and in contrast to most classical continuous prior densities has
no zero p(oste)rior problem, i.e. can confirm universal
hypotheses, is reparametrization and regrouping invariant, and
avoids the old-evidence and updating problem. It even performs
well (actually better) in non-computable environments.",
}
@Article{Hutter:07mlconvxx,
author = "Marcus Hutter and Andrej A. Muchnik",
title = "On Semimeasures Predicting {Martin-L{\"o}f} Random Sequences",
journal = "Theoretical Computer Science",
volume = "382",
number = "3",
pages = "247--261",
_month = sep,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#mlconvxx",
url = "http://arxiv.org/abs/0708.2319",
pdf = "http://www.hutter1.net/ai/mlconvxx.pdf",
ps = "http://www.hutter1.net/ai/mlconvxx.ps",
latex = "http://www.hutter1.net/ai/mlconvxx.tex",
slides = "http://www.hutter1.net/ai/smlconvx.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1016/j.tcs.2007.03.040",
issn = "0304-3975",
keywords = "Sequence prediction; Algorithmic Information Theory;
universal enumerable semimeasure; mixture distributions;
posterior convergence; Martin-L{\"o}f randomness;
quasimeasures.",
abstract = "Solomonoff's central result on induction is that the posterior of
a universal semimeasure M converges rapidly and with probability
1 to the true sequence generating posterior mu, if the latter is
computable. Hence, M is eligible as a universal sequence predictor
in case of unknown mu. Despite some nearby results and proofs in
the literature, the stronger result of convergence for all
(Martin-Loef) random sequences remained open. Such a convergence
result would be particularly interesting and natural, since
randomness can be defined in terms of M itself. We show that there
are universal semimeasures M which do not converge for all random
sequences, i.e. we give a partial negative answer to the open
problem. We also provide a positive answer for some non-universal
semimeasures. We define the incomputable measure D as a mixture
over all computable measures and the enumerable semimeasure W as a
mixture over all enumerable nearly-measures. We show that W
converges to D and D to mu on all random sequences. The Hellinger
distance measuring closeness of two distributions plays
a central role.",
support = "SNF grant 2100-67712 and RFBF grants N04-01-00427 and N02-01-22001",
}
@Article{Hutter:07algprob,
author = "Marcus Hutter and Shane Legg and Paul M. B. Vit{\'a}nyi",
title = "Algorithmic Probability",
journal = "Scholarpedia",
volume = "2",
number = "8",
pages = "2572",
_month = aug,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#algprob",
http = "http://www.scholarpedia.org/article/Algorithmic_Probability",
pdf = "http://www.hutter1.net/ai/algprob.pdf",
ps = "http://www.hutter1.net/ai/algprob.ps",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.4249/scholarpedia.2572",
issn = "1941-6016",
keywords = "algorithmic information theory,
algorithmic complexity,
discrete/continuous algorithmic probability,
Bayes, Occam, Epicurus,
applications, references",
abstract = "Algorithmic ``Solomonoff'' Probability (AP) assigns to objects an a
priori probability that is in some sense universal. This prior
distribution has theoretical applications in a number of areas,
including inductive inference theory and the time complexity
analysis of algorithms. Its main drawback is that it is not
computable and thus can only be approximated in practice",
}
@InProceedings{Hutter:07improb,
author = "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter",
title = "Learning about a Categorical Latent Variable under Prior Near-Ignorance",
booktitle = "Proc. 5th International Symposium on
Imprecise Probability: Theories and Applications ({ISIPTA'07})",
pages = "357--364",
_editor = "G. de Cooman and J. Vejnarova and M. Zaffalon",
publisher = "Action M Agency",
address = "Prague, Czech Republic",
_month = jul,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#improb",
url = "http://arxiv.org/abs/0705.4312",
pdf = "http://www.hutter1.net/ai/improb.pdf",
ps = "http://www.hutter1.net/ai/improb.ps",
latex = "http://www.hutter1.net/ai/improb.tex",
slides = "http://www.hutter1.net/ai/simprob.pdf",
project = "http://www.hutter1.net/official/projects.htm#robust",
code = "http://www.hutter1.net/ai/improb.cpp",
isbn = "978-80-86742-20-5",
keywords = "Prior near-ignorance, latent and manifest variables,
observational processes, vacuous beliefs, imprecise probabilities.",
abstract = "It is well known that complete prior ignorance is not compatible
with learning, at least in a coherent theory of (epistemic)
uncertainty. What is less widely known, is that there is a state
similar to full ignorance, that Walley calls \emph{near-ignorance},
that permits learning to take place. In this paper we provide new
and substantial evidence that also near-ignorance cannot be really
regarded as a way out of the problem of starting statistical
inference in conditions of very weak beliefs. The key to this result
is focusing on a setting characterized by a variable of interest
that is \emph{latent}. We argue that such a setting is by far the
most common case in practice, and we show, for the case of
categorical latent variables (and general \emph{manifest} variables)
that there is a sufficient condition that, if satisfied, prevents
learning to take place under prior near-ignorance. This condition is
shown to be easily satisfied in the most common statistical
problems.",
znote = "Acceptance rate: 48/70 = 68\%",
}
@InProceedings{Hutter:07pcreg,
author = "Marcus Hutter",
title = "{B}ayesian Regression of Piecewise Constant Functions",
booktitle = "Proc. ISBA 8th International Meeting on Bayesian Statistics",
address = "Benidorm, Spain",
_editor = "J.M. Bernardo and M.J. Bayarri and J.O. Berger and
A.P. David and D. Heckerman and A.F.M. Smith and M. West",
publisher = "Oxford University Press",
pages = "607--612",
_month = jul,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#pcreg",
url = "http://arxiv.org/abs/math.ST/0606315",
pdf = "http://www.hutter1.net/ai/pcreg.pdf",
ps = "http://www.hutter1.net/ai/pcreg.ps",
latex = "http://www.hutter1.net/ai/pcreg.tex",
slides = "http://www.hutter1.net/ai/spcreg.pdf",
award = "http://bayesian.org/project/lindley-prize/",
project = "http://www.hutter1.net/official/projects.htm#bayes",
ccode = "http://www.hutter1.net/ai/pcreg.cpp",
rcode = "http://www.hutter1.net/ai/cpcreg.zip",
isbn = "978-0-19-921465-5",
abstract = "We derive an exact and efficient Bayesian regression algorithm for
piecewise constant functions of unknown segment number, boundary
location, and levels. It works for any noise and segment level
prior, e.g.\ Cauchy which can handle outliers. We derive simple but
good estimates for the in-segment variance. We also propose a
Bayesian regression curve as a better way of smoothing data without
blurring boundaries. The Bayesian approach also allows
straightforward determination of the evidence, break probabilities
and error estimates, useful for model selection and significance and
robustness studies. We briefly mention the performance on synthetic
and real-world examples. The full version of the paper contains
detailed derivations, more motivation and discussion, the complete
algorithm, the experiments, and various extensions.",
keywords = "Bayesian regression, exact polynomial algorithm, non-parametric
inference, piecewise constant function, dynamic programming,
change point problem.",
note = "Lindley prize for innovative research in Bayesian statistics.",
znote = "Acceptance rate: 19/326 = 6\%.",
}
@InProceedings{Hutter:07pquest,
author = "Daniil Ryabko and Marcus Hutter",
title = "On Sequence Prediction for Arbitrary Measures",
booktitle = "Proc. IEEE International Symposium on Information Theory ({ISIT'07})",
pages = "2346--2350",
_editor = "A. Goldsmith and M. Medard and A. Shokrollahi and R. Zamir",
publisher = "IEEE",
address = "Nice, France",
_month = jun,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#pquest",
url = "http://arxiv.org/abs/cs.LG/0606077",
pdf = "http://www.hutter1.net/ai/pquest.pdf",
ps = "http://www.hutter1.net/ai/pquest.ps",
latex = "http://www.hutter1.net/ai/pquest.tex",
slides = "http://www.hutter1.net/ai/spquest.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
doi = "10.1109/ISIT.2007.4557570",
isbn = "1-4244-1429-6",
keywords = "sequence prediction, local absolute continuity,
non-stationary measures, average/expected criteria,
absolute/KL divergence, mixtures of measures.",
abstract = "Suppose we are given two probability measures on the set of
one-way infinite finite-alphabet sequences. Consider the
question when one of the measures predicts the other, that is,
when conditional probabilities converge (in a certain sense), if
one of the measures is chosen to generate the sequence. This
question may be considered a refinement of the problem of sequence
prediction in its most general formulation: for a given class of
probability measures, does there exist a measure which predicts
all of the measures in the class? To address this problem, we find
some conditions on local absolute continuity which are sufficient
for prediction and generalize several different notions
that are known to be sufficient for prediction. We also formulate
some open questions to outline a direction for finding the
conditions on classes of measures for which prediction is
possible.",
support = "SNF grant 200020-107616",
}
@InProceedings{Hutter:07idefs,
author = "Shane Legg and Marcus Hutter",
title = "A Collection of Definitions of Intelligence",
booktitle = "Advances in Artificial General Intelligence: Concepts, Architectures and Algorithms",
series = "Frontiers in Artificial Intelligence and Applications",
volume = "157",
pages = "17--24",
editor = "B. Goertzel and P. Wang",
publisher = "IOS Press",
address = "Amsterdam, NL",
_month = jun,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#idefs",
url = "http://arxiv.org/abs/0706.3639",
http = "http://www.idsia.ch/~shane/intelligence.html",
pdf = "http://www.hutter1.net/ai/idefs.pdf",
ps = "http://www.hutter1.net/ai/idefs.ps",
latex = "http://www.hutter1.net/ai/idefs.tex",
project = "http://www.hutter1.net/official/projects.htm#uai",
isbn = "978-1-58603-758-1",
issn = "0922-6389",
keywords = "intelligence definitions, collective, psychologist,
artificial, universal",
abstract = "This chapter is a survey of a large number of informal definitions
of ``intelligence'' that the authors have collected over the years.
Naturally, compiling a complete list would be impossible as many
definitions of intelligence are buried deep inside articles and
books. Nevertheless, the 70-odd definitions presented here are, to
the authors' knowledge, the largest and most well referenced
collection there is.",
support = "SNF grant 200020-107616",
}
@InProceedings{Hutter:07lorp,
author = "Marcus Hutter",
title = "The Loss Rank Principle for Model Selection",
booktitle = "Proc. 20th Annual Conf. on Learning Theory ({COLT'07})",
address = "San Diego, USA",
series = "LNAI",
volume = "4539",
_editor = "N. Bshouty and C. Gentile",
publisher = "Springer",
pages = "589--603",
_month = jun,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#lorp",
url = "http://arxiv.org/abs/math.ST/0702804",
pdf = "http://www.hutter1.net/ai/lorp.pdf",
ps = "http://www.hutter1.net/ai/lorp.ps",
latex = "http://www.hutter1.net/ai/lorp.tex",
slides = "http://www.hutter1.net/ai/slorp.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
doi = "10.1007/978-3-540-72927-3_42",
issn = "0302-9743",
keywords = "Model selection, loss rank principle,
non-parametric regression, classification
general loss function, k nearest neighbors.",
abstract = "We introduce a new principle for model selection in regression and
classification. Many regression models are controlled by some
smoothness or flexibility or complexity parameter c, e.g. the number
of neighbors to be averaged over in k nearest neighbor (kNN)
regression or the polynomial degree in regression with polynomials.
Let f_D^c be the (best) regressor of complexity c on data D. A more
flexible regressor can fit more data D' well than a more rigid one.
If something (here small loss) is easy to achieve it's typically
worth less. We define the loss rank of f_D^c as the number of other
(fictitious) data D' that are fitted better by f_D'^c than D is
fitted by f_D^c. We suggest selecting the model complexity c that
has minimal loss rank (LoRP). Unlike most penalized maximum
likelihood variants (AIC,BIC,MDL), LoRP only depends on the
regression function and loss function. It works without a stochastic noise
model, and is directly applicable to any non-parametric regressor,
like kNN. In this paper we formalize, discuss, and motivate LoRP,
study it for specific regression problems, in particular linear
ones, and compare it to other model selection schemes.",
znote = "Acceptance rate: 41/92 = 45\%",
}
@Article{Hutter:07ait,
author = "Marcus Hutter",
title = "Algorithmic Information Theory: a brief non-technical guide to the field",
journal = "Scholarpedia",
volume = "2",
number = "3",
pages = "2519",
_month = mar,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#ait",
http = "http://www.scholarpedia.org/article/Algorithmic_Information_Theory",
url = "http://arxiv.org/abs/cs.IT/0703024",
pdf = "http://www.hutter1.net/ai/ait.pdf",
ps = "http://www.hutter1.net/ai/ait.ps",
latex = "http://www.hutter1.net/ai/ait.zip",
slides = "http://www.hutter1.net/ai/sapplait.pdf",
video1 = "https://pirsa.org/18040109",
video2 = "http://www.youtu.be/WZvxAA8ZdD4",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.4249/scholarpedia.2519",
issn = "1941-6016",
keywords = "Algorithmic information theory,
algorithmic ``Kolmogorov'' complexity,
algorithmic ``Solomonoff'' probability,
universal ``Levin'' search,
algorithmic ``Martin-Loef'' randomness,
applications, history, references, notation, nomenclature, map.",
abstract = "This article is a brief guide to the field of algorithmic
information theory (AIT), its underlying philosophy, and the most
important concepts. AIT arises by mixing information theory and
computation theory to obtain an objective and absolute notion of
information in an individual object, and in so doing gives rise to
an objective and robust notion of randomness of individual objects.
This is in contrast to classical information theory that is based on
random variables and communication, and has no bearing on
information and randomness of individual objects. After a brief
overview, the major subfields, applications, history, and a map of
the field are presented.",
}
@Article{Hutter:07postbndx,
author = "Alexey Chernov and Marcus Hutter and J{\"u}rgen Schmidhuber",
title = "Algorithmic Complexity Bounds on Future Prediction Errors",
journal = "Information and Computation",
volume = "205",
number = "2",
pages = "242--261",
_month = feb,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#postbndx",
url = "http://arxiv.org/abs/cs.LG/0701120",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT05/alt05.jhtml",
pdf = "http://www.hutter1.net/ai/postbndx.pdf",
ps = "http://www.hutter1.net/ai/postbndx.ps",
latex = "http://www.hutter1.net/ai/postbndx.tex",
slides = "http://www.hutter1.net/ai/spostbnd.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1016/j.ic.2006.10.004",
issn = "0890-5401",
keywords = "Kolmogorov complexity, posterior bounds, online sequential prediction,
Solomonoff prior, monotone conditional complexity, total error,
future loss, randomness deficiency",
abstract = "We bound the future loss when predicting any (computably) stochastic
sequence online. Solomonoff finitely bounded the total deviation
of his universal predictor $M$ from the true distribution $mu$ by
the algorithmic complexity of $mu$. Here we assume we are at a
time $t>1$ and already observed $x=x_1...x_t$. We bound the future
prediction performance on $x_{t+1}x_{t+2}...$ by a new variant of
algorithmic complexity of $mu$ given $x$, plus the complexity of
the randomness deficiency of $x$. The new complexity is monotone
in its condition in the sense that this complexity can only
decrease if the condition is prolonged. We also briefly discuss
potential generalizations to Bayesian model classes and to
classification problems.",
support = "SNF grant 2000-61847",
}
@InCollection{Hutter:07aixigentle,
author = "Marcus Hutter",
title = "Universal Algorithmic Intelligence: A Mathematical Top$\rightarrow$Down Approach",
booktitle = "Artificial General Intelligence",
_editor = "B. Goertzel and C. Pennachin",
publisher = "Springer",
address = "Berlin",
_series = "Cognitive Technologies",
pages = "227--290",
_month = jan,
year = "2007",
bibtex = "http://www.hutter1.net/official/bib.htm#aixigentle",
http = "http://www.hutter1.net/ai/aixigentle.htm",
url = "http://arxiv.org/abs/cs.AI/0701125",
pdf = "http://www.hutter1.net/ai/aixigentle.pdf",
ps = "http://www.hutter1.net/ai/aixigentle.ps",
latex = "http://www.hutter1.net/ai/aixigentle.tex",
slides = "http://www.hutter1.net/ai/saixigentle.pdf",
video = "http://vimeo.com/14888930",
project = "http://www.hutter1.net/official/projects.htm#uai",
press = "http://www.hutter1.net/official/press.htm#uaibook",
doi = "10.1007/978-3-540-68677-4_8",
isbn = "3-540-23733-X",
categories = "I.2. [Artificial Intelligence]",
keywords = "Artificial intelligence; algorithmic probability;
sequential decision theory; rational agents;
value function; Solomonoff induction;
Kolmogorov complexity; reinforcement learning;
universal sequence prediction; strategic games;
function minimization; supervised learning.",
abstract = "Decision theory formally solves the problem of rational agents in
uncertain worlds if the true environmental prior probability
distribution is known. Solomonoff's theory of universal induction
formally solves the problem of sequence prediction for unknown
prior distribution. We combine both ideas and get a parameter-free
theory of universal Artificial Intelligence. We give strong
arguments that the resulting AIXI model is the most intelligent
unbiased agent possible. We outline for a number of problem
classes, including sequence prediction, strategic games, function
minimization, reinforcement and supervised learning, how the AIXI
model can formally solve them. The major drawback of the AIXI
model is that it is uncomputable. To overcome this problem, we
construct a modified algorithm AIXI$tl$ that is still
effectively more intelligent than any other time $t$ and length $l$
bounded agent. The computation time of AIXI$tl$ is of the order $t
\cdot 2^l$. Other discussed topics are formal definitions of
intelligence order relations, the horizon problem and relations of
the AIXI theory to other AI approaches.",
}
%-------------Publications-of-Marcus-Hutter-2006--------------%
@Article{Hutter:06unipriorx,
author = "Marcus Hutter",
title = "On Generalized Computable Universal Priors and their Convergence",
journal = "Theoretical Computer Science",
volume = "364",
number = "1",
pages = "27--41",
_month = nov,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#unipriorx",
url = "http://arxiv.org/abs/cs.LG/0503026",
pdf = "http://www.hutter1.net/ai/unipriorx.pdf",
ps = "http://www.hutter1.net/ai/unipriorx.ps",
latex = "http://www.hutter1.net/ai/unipriorx.tex",
slides = "http://www.hutter1.net/ai/sunipriors.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1016/j.tcs.2006.07.039",
issn = "0304-3975",
keywords = "Sequence prediction; Algorithmic Information Theory;
Solomonoff's prior; universal probability;
mixture distributions; posterior convergence;
computability concepts; Martin-Loef randomness.",
abstract = "Solomonoff unified Occam's razor and Epicurus' principle of
multiple explanations to one elegant, formal, universal theory of
inductive inference, which initiated the field of algorithmic
information theory. His central result is that the posterior of
the universal semimeasure M converges rapidly to the true sequence
generating posterior mu, if the latter is computable. Hence, M is
eligible as a universal predictor in case of unknown mu. The first
part of the paper investigates the existence and convergence of
computable universal (semi)measures for a hierarchy of
computability classes: recursive, estimable, enumerable, and
approximable. For instance, M is known to be enumerable, but
not estimable, and to dominate all enumerable semimeasures. We
present proofs for discrete and continuous semimeasures. The
second part investigates more closely the types of convergence,
possibly implied by universality: in difference and in ratio, with
probability 1, in mean sum, and for Martin-Loef random sequences.
We introduce a generalized concept of randomness for individual
sequences and use it to exhibit difficulties regarding these
issues. In particular, we show that convergence fails (holds) on
generalized-random sequences in gappy (dense) Bernoulli classes.",
}
@Article{Hutter:06fuo,
author = "Marcus Hutter and Shane Legg",
title = "Fitness Uniform Optimization",
journal = "IEEE Transactions on Evolutionary Computation",
volume = "10",
number = "5",
pages = "568--589",
_month = oct,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#fuo",
url = "http://arxiv.org/abs/cs.NE/0610126",
pdf = "http://www.hutter1.net/ai/fuo.pdf",
ps = "http://www.hutter1.net/ai/fuo.ps",
latex = "http://www.hutter1.net/ai/fuo.zip",
slides = "http://www.hutter1.net/ai/sfuss.pdf",
project = "http://www.hutter1.net/official/projects.htm#optimize",
press = "http://www.hutter1.net/official/press.htm#fuss",
doi = "10.1109/TEVC.2005.863127",
issn = "1089-778X",
keywords = "Evolutionary algorithms, fitness uniform selection scheme, fitness
uniform deletion scheme, preserve diversity, local optima, evolution,
universal similarity relation, correlated recombination, fitness tree
model, traveling salesman, set covering, satisfiability.",
abstract = "In evolutionary algorithms, the fitness of a population increases with
time by mutating and recombining individuals and by a biased selection
of more fit individuals. The right selection pressure is critical in
ensuring sufficient optimization progress on the one hand and in
preserving genetic diversity to be able to escape from local optima on
the other hand. Motivated by a universal similarity relation on the
individuals, we propose a new selection scheme, which is uniform in
the fitness values. It generates selection pressure toward sparsely
populated fitness regions, not necessarily toward higher fitness, as
is the case for all other selection schemes. We show analytically on a
simple example that the new selection scheme can be much more
effective than standard selection schemes. We also propose a new
deletion scheme which achieves a similar result via deletion and show
how such a scheme preserves genetic diversity more effectively than
standard approaches. We compare the performance of the new schemes to
tournament selection and random deletion on an artificial deceptive
problem and a range of NP-hard problems: traveling salesman, set
covering and satisfiability.",
}
@InProceedings{Hutter:06discount,
author = "Marcus Hutter",
title = "General Discounting versus Average Reward",
booktitle = "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})",
address = "Barcelona, Spain",
series = "LNAI",
volume = "4264",
_editor = "Jose L. Balcázar and Phil Long and Frank Stephan",
publisher = "Springer",
pages = "244--258",
_month = oct,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#discount",
url = "http://arxiv.org/abs/cs.LG/0605040",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml",
pdf = "http://www.hutter1.net/ai/discount.pdf",
ps = "http://www.hutter1.net/ai/discount.ps",
latex = "http://www.hutter1.net/ai/discount.tex",
slides = "http://www.hutter1.net/ai/sdiscount.pdf",
project = "http://www.hutter1.net/official/projects.htm#rl",
issn = "0302-9743",
isbn = "3-540-46649-5",
doi = "10.1007/11894841_21",
keywords = "reinforcement learning; average value;
discounted value; arbitrary environment;
arbitrary discount sequence; effective horizon;
increasing farsightedness; consistent behavior.",
abstract = "Consider an agent interacting with an environment in cycles. In
every interaction cycle the agent is rewarded for its performance.
We compare the average reward U from cycle 1 to m (average
value) with the future discounted reward V from cycle k to
infinity (discounted value). We consider essentially arbitrary
(non-geometric) discount sequences and arbitrary reward sequences
(non-MDP environments). We show that asymptotically U for
m->infinity and V for k->infinity are equal, provided both
limits exist. Further, if the effective horizon grows linearly
with k or faster, then existence of the limit of U implies
that the limit of V exists. Conversely, if the effective horizon
grows linearly with k or slower, then existence of the limit of
V implies that the limit of U exists.",
znote = "Acceptance rate: 24/53 = 45\%",
}
@InProceedings{Hutter:06actopt,
author = "Daniil Ryabko and Marcus Hutter",
title = "Asymptotic Learnability of Reinforcement Problems with Arbitrary Dependence",
booktitle = "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})",
address = "Barcelona, Spain",
series = "LNAI",
volume = "4264",
_editor = "Jose L. Balcázar and Phil Long and Frank Stephan",
publisher = "Springer",
pages = "334--347",
_month = oct,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#actopt",
url = "http://arxiv.org/abs/cs.LG/0603110",
conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml",
pdf = "http://www.hutter1.net/ai/actopt.pdf",
ps = "http://www.hutter1.net/ai/actopt.ps",
latex = "http://www.hutter1.net/ai/actopt.tex",
slides = "http://www.hutter1.net/ai/sactopt.pdf",
project = "http://www.hutter1.net/official/projects.htm#universal",
press = "http://www.hutter1.net/official/press.htm#universal",
issn = "0302-9743",
isbn = "3-540-46649-5",
doi = "10.1007/11894841_27",
keywords = "Reinforcement learning, asymptotic average value,
self-optimizing policies, (non) Markov decision processes.",
abstract = "We address the problem of reinforcement
learning in which observations may exhibit an arbitrary form of
stochastic dependence on past observations and actions,
i.e. environments more general than (PO)MDPs.
The task for an agent is to attain the best possible asymptotic
reward where the true generating environment is unknown but
belongs to a known countable family of environments. We find some
sufficient conditions on the class of environments under which an
agent exists which attains the best asymptotic reward for any
environment in the class. We analyze how tight these conditions
are and how they relate to different probabilistic assumptions
known in reinforcement learning and related fields, such as Markov
Decision Processes and mixing conditions.",
znote = "Acceptance rate: 24/53 = 45\%",
}
@Misc{Hutter:06hprize,
author = "Marcus Hutter",
title = "Human Knowledge Compression Prize",
_month = aug,
year = "2006/2020",
bibtex = "http://www.hutter1.net/official/bib.htm#hprize",
project = "http://www.hutter1.net/prize/index.htm",
press = "http://www.hutter1.net/official/press.htm#hprize",
keywords = "Wikipedia; artificial intelligence;
lossless data compression; 50'000€/500'000€.",
abstract = "Being able to compress well is closely related to intelligence.
While intelligence is a slippery concept, file sizes are hard
numbers. The intention of this prize is to give incentives for
advancing the field of Artificial Intelligence through the
compression of human knowledge. The better one can compress the
encyclopedia Wikipedia, the better one can predict; and being able
to predict well is key for being able to act intelligently.",
note = "open ended, http://prize.hutter1.net/",
for = "080401(80%),080199(20%)",
}
@Article{Hutter:06mdlspeedx,
author = "Jan Poland and Marcus Hutter",
title = "{MDL} Convergence Speed for {B}ernoulli Sequences",
journal = "Statistics and Computing",
volume = "16",
number = "2",
pages = "161--175",
_month = jun,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#mdlspeedx",
url = "http://arxiv.org/abs/math.ST/0602505",
pdf = "http://www.hutter1.net/ai/mdlspeedx.pdf",
ps = "http://www.hutter1.net/ai/mdlspeedx.ps",
latex = "http://www.hutter1.net/ai/mdlspeedx.tex",
slides = "http://www.hutter1.net/ai/smdlspeed.pdf",
slidesppt = "http://www.hutter1.net/ai/smdlspeed.ppt",
project = "http://www.hutter1.net/official/projects.htm#mdl",
issn = "0960-3174",
doi = "10.1007/s11222-006-6746-3",
keywords = "MDL, Minimum Description Length, Convergence Rate,
Prediction, Bernoulli, Discrete Model Class.",
abstract = "The Minimum Description Length principle for online sequence
estimation/prediction in a proper learning setup is studied. If
the underlying model class is discrete, then the total expected
square loss is a particularly interesting performance measure: (a)
this quantity is finitely bounded, implying convergence with
probability one, and (b) it additionally specifies the convergence
speed. For MDL, in general one can only have loss bounds which are
finite but exponentially larger than those for Bayes mixtures. We
show that this is even the case if the model class contains only
Bernoulli distributions. We derive a new upper bound on the
prediction error for countable Bernoulli classes. This implies a
small bound (comparable to the one for Bayes mixtures) for certain
important model classes. We discuss the application to Machine
Learning tasks such as classification and hypothesis testing, and
generalization to countable classes of i.i.d. models.",
}
@InProceedings{Hutter:06usp,
author = "Marcus Hutter",
title = "On the Foundations of Universal Sequence Prediction",
booktitle = "Proc. 3rd Annual Conference on Theory and
Applications of Models of Computation ({TAMC'06})",
volume = "3959",
series = "LNCS",
pages = "408--420",
_editor = "J.-Y. Cai and S. B. Cooper and A. Li",
publisher = "Springer",
_address = "Beijing",
_month = may,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#usp",
url = "http://arxiv.org/abs/cs.LG/0605009",
conf = "http://gcl.iscas.ac.cn/accl06/TAMC06_Home.htm",
pdf = "http://www.hutter1.net/ai/usp.pdf",
ps = "http://www.hutter1.net/ai/usp.ps",
latex = "http://www.hutter1.net/ai/usp.tex",
slides = "http://www.hutter1.net/ai/susp.pdf",
poster = "http://www.hutter1.net/ai/susps.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "0302-9743",
isbn = "3-540-34021-1",
doi = "10.1007/11750321_39",
keywords = "Sequence prediction, Bayes, Solomonoff prior,
Kolmogorov complexity, Occam's razor, prediction bounds,
model classes, philosophical issues, symmetry principle,
confirmation theory, reparametrization invariance,
old-evidence/updating problem, (non)computable environments.",
abstract = "Solomonoff completed the Bayesian framework by providing a
rigorous, unique, formal, and universal choice for the model class
and the prior. We discuss in breadth how and in which sense
universal (non-i.i.d.) sequence prediction solves various
(philosophical) problems of traditional Bayesian sequence
prediction. We show that Solomonoff's model possesses many
desirable properties: Fast convergence and strong bounds, and in
contrast to most classical continuous prior densities has no zero
p(oste)rior problem, i.e. can confirm universal hypotheses, is
reparametrization and regrouping invariant, and avoids the
old-evidence and updating problem. It even performs well (actually
better) in non-computable environments.",
znote = "Acceptance rate: 76/400 = 19\%",
alt = "Also 2-page abstract and poster at 9th ISBA World Meeting (2008)",
abstract2p = "http://www.hutter1.net/ai/usps.pdf",
}
@InProceedings{Hutter:06aixifoe,
author = "Jan Poland and Marcus Hutter",
title = "Universal Learning of Repeated Matrix Games",
booktitle = "Proc. 15th Annual Machine Learning Conf. of {B}elgium and {T}he {N}etherlands ({Benelearn'06})",
pages = "7--14",
address = "Ghent, Belgium",
_editor = "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer",
xpublisher = "",
_month = may,
year = "2006",
isbn = "90 382 0948 7",
bibtex = "http://www.hutter1.net/official/bib.htm#aixifoe",
url = "http://arxiv.org/abs/cs.LG/0508073",
conf = "http://bioinformatics.psb.ugent.be/benelearn2006/",
pdf = "http://www.hutter1.net/ai/aixifoe.pdf",
ps = "http://www.hutter1.net/ai/aixifoe.ps",
latex = "http://www.hutter1.net/ai/aixifoe.zip",
slides = "http://www.hutter1.net/ai/saixifoe.pdf",
project = "http://www.hutter1.net/official/projects.htm#expert",
abstract = "We study and compare the learning dynamics of two universal
learning algorithms, one based on Bayesian learning and the
other on prediction with expert advice. Both approaches have
strong asymptotic performance guarantees. When confronted with
the task of finding good long-term strategies in repeated
2 x 2 matrix games, they behave quite differently. We consider
the case where the learning algorithms are not even informed
about the game they are playing.",
}
@InProceedings{Hutter:06ior,
author = "Shane Legg and Marcus Hutter",
title = "A Formal Measure of Machine Intelligence",
booktitle = "Proc. 15th Annual Machine Learning Conference of {B}elgium and {T}he {N}etherlands ({Benelearn'06})",
pages = "73--80",
address = "Ghent, Belgium",
_editor = "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer",
_month = may,
year = "2006",
isbn = "90 382 0948 7",
bibtex = "http://www.hutter1.net/official/bib.htm#ior",
url = "http://arxiv.org/abs/cs.AI/0605024",
conf = "http://bioinformatics.psb.ugent.be/benelearn2006/",
pdf = "http://www.hutter1.net/ai/ior.pdf",
ps = "http://www.hutter1.net/ai/ior.ps",
latex = "http://www.hutter1.net/ai/ior.zip",
slides = "http://www.hutter1.net/ai/sior.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
press = "http://www.hutter1.net/official/press.htm#ior",
abstract = "A fundamental problem in artificial intelligence is that nobody really
knows what intelligence is. The problem is especially acute when we
need to consider artificial systems which are significantly different
to humans. In this paper we approach this problem in the following
way: We take a number of well known informal definitions of human
intelligence that have been given by experts, and extract their
essential features. These are then mathematically formalised to
produce a general measure of intelligence for arbitrary machines. We
believe that this measure formally captures the concept of machine
intelligence in the broadest reasonable sense.",
}
@InProceedings{Hutter:06robot,
author = "Viktor Zhumatiy and Faustino Gomez and Marcus Hutter and J{\"u}rgen Schmidhuber",
title = "Metric State Space Reinforcement Learning for a Vision-Capable Mobile Robot",
booktitle = "Proc. 9th International Conf. on Intelligent Autonomous Systems ({IAS'06})",
pages = "272--281",
_editor = "Tamio Arai and Rolf Pfeifer and Tucker Balch and Hiroshi Yokoi",
publisher = "IOR Press",
_month = mar,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#robot",
url = "http://arxiv.org/abs/cs.RO/0603023",
conf = "http://www.arai.pe.u-tokyo.ac.jp/IAS-9/",
pdf = "http://www.hutter1.net/ai/robot.pdf",
ps = "http://www.hutter1.net/ai/robot.ps",
latex = "http://www.hutter1.net/ai/robot.zip",
slides = "http://www.hutter1.net/ai/srobot.pdf",
slidesppt = "http://www.hutter1.net/ai/srobot.ppt",
isbn = "1-58603-595-9",
keywords = "reinforcement learning; mobile robots.",
abstract = "We address the problem of autonomously learning controllers for
vision-capable mobile robots. We extend McCallum's (1995)
Nearest-Sequence Memory algorithm to allow for general metrics
over state-action trajectories. We demonstrate the feasibility of
our approach by successfully running our algorithm on a real
mobile robot. The algorithm is novel and unique in that it (a)
explores the environment and learns directly on a mobile robot
without using a hand-made computer model as an intermediate step,
(b) does not require manual discretization of the sensor input
space, (c) works in piecewise continuous perceptual spaces, and
(d) copes with partial observability. Together this allows
learning from much less experience compared to previous methods.",
znote = "Acceptance rate: 112/146 = 77\%",
}
@Article{Hutter:06knapsack,
author = "Monaldo Mastrolilli and Marcus Hutter",
title = "Hybrid Rounding Techniques for Knapsack Problems",
journal = "Discrete Applied Mathematics",
volume = "154",
number = "4",
pages = "640--649",
_month = mar,
year = "2006",
bibtex = "http://www.hutter1.net/official/bib.htm#knapsack",
url = "http://arxiv.org/abs/cs.CC/0305002",
pdf = "http://www.hutter1.net/ai/knapsack.pdf",
ps = "http://www.hutter1.net/ai/knapsack.ps",
latex = "http://www.hutter1.net/ai/knapsack.tex",
project = "http://www.hutter1.net/official/projects.htm#optimize",
issn = "0166-218X",
doi = "10.1016/j.dam.2005.08.004",
abstract = "We address the classical knapsack problem and a variant in which an upper
bound is imposed on the number of items that can be selected. We show that
appropriate combinations of rounding techniques yield novel and powerful
ways of rounding. As an application of these techniques, we present faster
polynomial time approximation schemes that computes an approximate solution
of any fixed accuracy in linear time. This linear complexity bounds give a
substantial improvement of the best previously known polynomial bounds",
}
@Article{Hutter:06unimdlx,
author = "Marcus Hutter",
title = "Sequential Predictions based on Algorithmic Complexity",
journal = "Journal of Computer and System Sciences",
volume = "72",
number = "1",
pages = "95--117",
_month = feb,
year = "2006",
url = "http://arxiv.org/abs/cs.IT/0508043",
bibtex = "http://www.hutter1.net/official/bib.htm#unimdlx",
url = "http://arxiv.org/abs/cs.IT/0508043",
pdf = "http://www.hutter1.net/ai/unimdlx.pdf",
ps = "http://www.hutter1.net/ai/unimdlx.ps",
latex = "http://www.hutter1.net/ai/unimdlx.tex",
slides = "http://www.hutter1.net/ai/sunimdl.pdf",
project = "http://www.hutter1.net/official/projects.htm#mdl",
issn = "0022-0000",
doi = "10.1016/j.jcss.2005.07.001",
keywords = "Sequence prediction; Algorithmic Information Theory;
Solomonoff's prior; Monotone Kolmogorov Complexity;
Minimal Description Length; Convergence;
Self-Optimizingness",
abstract = "This paper studies sequence prediction based on the
monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on
universal MDL. $m$ is extremely close to Solomonoff's prior $M$,
the latter being an excellent predictor in deterministic as well
as probabilistic environments, where performance is measured in
terms of convergence of posteriors or losses. Despite this
closeness to $M$, it is difficult to assess the prediction quality
of $m$, since little is known about the closeness of their
posteriors, which are the important quantities for prediction.
We show that for deterministic computable environments, the
``posterior'' and losses of $m$ converge, but rapid convergence
could only be shown on-sequence; the off-sequence behavior is
unclear. In probabilistic environments, neither the posterior nor
the losses converge, in general.",
}
@Proceedings{Hutter:06kcdagabs,
editor = "Marcus Hutter and Wolfgang Merkle and Paul M. B. Vit\'anyi",
title = "Kolmogorov Complexity and Applications",
number = "06051",
_month = jan/aug,
year = "2006",
series = "Dagstuhl Seminar Proceedings",
url1 = "http://www.hutter1.net/dagstuhl/",
url2 = "http://drops.dagstuhl.de/portals/06051",
url3 = "http://drops.dagstuhl.de/opus/volltexte/2006/663",
pdf = "http://www.hutter1.net/dagstuhl/kcdagabs.pdf",
ps = "http://www.hutter1.net/dagstuhl/kcdagabs.ps",
latex = "http://www.hutter1.net/dagstuhl/kcdagabs.tex",
project = "http://www.hutter1.net/official/projects.htm#ait",
issn = "1862-4405",
publisher = "IBFI",
_publisher = "Internationales Begegnungs- und Forschungszentrum fuer Informatik (IBFI), Schloss Dagstuhl, Germany",
address = "Dagstuhl, Germany",
keywords = "Information theory, Kolmogorov Complexity, effective randomnes,
algorithmic probability, recursion theory, computational complexity,
machine learning",
abstract = "From 29.01.06 to 03.02.06,
the Dagstuhl Seminar 06051 ``Kolmogorov Complexity and Applications''
was held in the International Conference and Research Center (IBFI),
Schloss Dagstuhl. During the seminar, several participants presented
their current research, and ongoing work and open problems were
discussed. Abstracts of the presentations given during the seminar
as well as abstracts of seminar results and ideas are put together
in this proceedings. The first section describes the seminar topics and
goals in general. Links to extended abstracts or full papers are
provided, if available.",
note = "http://drops.dagstuhl.de/portals/06051",
}
%-------------Publications-of-Marcus-Hutter-2005--------------%
@Article{Hutter:05mdl2px,
author = "Jan Poland and Marcus Hutter",
title = "Asymptotics of Discrete {MDL} for Online Prediction",
journal = "IEEE Transactions on Information Theory",
_month = nov,
volume = "51",
number = "11",
pages = "3780--3795",
year = "2005",
bibtex = "http://www.hutter1.net/official/bib.htm#mdl2px",
url = "http://arxiv.org/abs/cs.IT/0506022",
pdf = "http://www.hutter1.net/ai/mdl2px.pdf",
ps = "http://www.hutter1.net/ai/mdl2px.ps",
latex = "http://www.hutter1.net/ai/mdl2px.zip",
slides = "http://www.hutter1.net/ai/smdl2p.pdf",
slidesppt = "http://www.hutter1.net/ai/smdl2p.ppt",
project = "http://www.hutter1.net/official/projects.htm#mdl",
doi = "10.1109/TIT.2005.856956",
issn = "0018-9448",
keywords = "Algorithmic Information Theory, Classification, Consistency,
Discrete Model Class, Loss Bounds, Minimum Description Length,
Regression, Sequence Prediction, Stabilization, Universal Induction.",
abstract = "Minimum Description Length (MDL) is an important principle for induction and
prediction, with strong relations to optimal Bayesian learning. This paper
deals with learning non-i.i.d. processes by means of two-part MDL, where the
underlying model class is countable. We consider the online learning framework,
i.e. observations come in one by one, and the predictor is allowed to update
his state of mind after each time step. We identify two ways of predicting by
MDL for this setup, namely a static and a dynamic one. (A third variant,
hybrid MDL, will turn out inferior.) We will prove that under the only
assumption that the data is generated by a distribution contained in the model
class, the MDL predictions converge to the true values almost surely. This is
accomplished by proving finite bounds on the quadratic, the Hellinger, and the
Kullback-Leibler loss of the MDL learner, which are however exponentially worse
than for Bayesian prediction. We demonstrate that these bounds are sharp, even
for model classes containing only Bernoulli distributions. We show how these
bounds imply regret bounds for arbitrary loss functions. Our results apply to a
wide range of setups, namely sequence prediction, pattern classification,
regression, and universal induction in the sense of Algorithmic Information
Theory among others.",
}
@Article{Hutter:05tree,
author = "Marco Zaffalon and Marcus Hutter",
title = "Robust Inference of Trees",
journal = "Annals of Mathematics and Artificial Intelligence",
volume = "45",
pages = "215--239",
_month = oct,
year = "2005",
_publisher = "Springer",
bibtex = "http://www.hutter1.net/official/bib.htm#tree",
url = "http://arxiv.org/abs/cs.LG/0511087",
pdf = "http://www.hutter1.net/ai/tree.pdf",
ps = "http://www.hutter1.net/ai/tree.ps",
latex = "http://www.hutter1.net/ai/tree.zip",
project = "http://www.hutter1.net/official/projects.htm#robust",
doi = "10.1007/s10472-005-9007-9",
issn = "1012-2443",
categories = "I.2. [Artificial Intelligence]",
keywords = "Robust inference, spanning trees, intervals,
dependence, graphical models, mutual information, imprecise
probabilities, imprecise Dirichlet model.",
abstract = "This paper is concerned with the reliable inference of optimal
tree-approximations to the dependency structure of an unknown
distribution generating data. The traditional approach to the
problem measures the dependency strength between random variables
by the index called mutual information. In this paper reliability
is achieved by Walley's imprecise Dirichlet model, which
generalizes Bayesian learning with Dirichlet priors. Adopting the
imprecise Dirichlet model results in posterior interval
expectation for mutual information, and in a set of plausible
trees consistent with the data. Reliable inference about the
actual tree is achieved by focusing on the substructure common to
all the plausible trees. We develop an exact algorithm that infers
the substructure in time O(m^4), m being the number of random
variables. The new algorithm is applied to a set of data sampled
from a known distribution. The method is shown to reliably infer
edges of the actual tree even when the data are very scarce,
unlike the traditional approach. Finally, we provide lower and
upper credibility limits for mutual information under the
imprecise Dirichlet model. These enable the previous developments
to be extended to a full inferential method for trees.",
}
@InProceedings{Hutter:05postbnd,
author = "Alexey Chernov and Marcus Hutter",
title = "Monotone Conditional Complexity Bounds on Future Prediction Errors",
booktitle = "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})",
address = "Singapore",
series = "LNAI",
volume = "3734",
_editor = "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita",
publisher = "Springer",
pages = "414--428",
_month = oct,
year = "2005",
bibtex = "http://www.hutter1.net/official/bib.htm#postbnd",
url = "http://arxiv.org/abs/cs.LG/0507041",
pdf = "http://www.hutter1.net/ai/postbnd.pdf",
ps = "http://www.hutter1.net/ai/postbnd.ps",
latex = "http://www.hutter1.net/ai/postbnd.tex",
slides = "http://www.hutter1.net/ai/spostbnd.pdf",
project = "http://www.hutter1.net/official/projects.htm#ait",
doi = "10.1007/11564089_32",
issn = "0302-9743",
isbn = "3-540-29242-X",
keywords = "Kolmogorov complexity, posterior bounds,
online sequential prediction, Solomonoff prior,
monotone conditional complexity, total error,
future loss, randomness deficiency.",
abstract = "We bound the future loss when predicting any (computably)
stochastic sequence online. Solomonoff finitely bounded the total
deviation of his universal predictor M from the true
distribution m by the algorithmic complexity of m. Here we
assume we are at a time t>1 and already observed x=x_1...x_t.
We bound the future prediction performance on x_{t+1}x_{t+2}...
by a new variant of algorithmic complexity of m given x,
plus the complexity of the randomness deficiency of x. The new
complexity is monotone in its condition in the sense that this
complexity can only decrease if the condition is prolonged. We
also briefly discuss potential generalizations to Bayesian model
classes and to classification problems.",
support = "SNF grant 200020-100259 and 2100-67712",
znote = "Acceptance rate: 30/98 = 30\%",
}
@InProceedings{Hutter:05actexp2,
author = "Jan Poland and Marcus Hutter",
title = "Defensive Universal Learning with Experts",
booktitle = "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})",
address = "Singapore",
series = "LNAI",
volume = "3734",
_editor = "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita",
publisher = "Springer",
_month = oct,
pages = "356--370",
year = "2005",
bibtex = "http://www.hutter1.net/official/bib.htm#actexp2",
url = "http://arxiv.org/abs/cs.LG/0507044",
pdf = "http://www.hutter1.net/ai/actexp2.pdf",
ps = "http://www.hutter1.net/ai/actexp2.ps",
latex = "http://www.hutter1.net/ai/actexp2.tex",
slides = "http://www.hutter1.net/ai/sactexp.pdf",
slidesppt = "http://www.hutter1.net/ai/sactexp.ppt",
project = "http://www.hutter1.net/official/projects.htm#expert",
doi = "10.1007/11564089_28",
issn = "0302-9743",
isbn = "3-540-29242-X",
keywords = "Prediction with expert advice, responsive
environments, partial observation game, bandits, universal
learning, asymptotic optimality.",
abstract = "This paper shows how universal learning can be achieved with
expert advice. To this aim, we specify an experts algorithm with
the following characteristics: (a) it uses only feedback from the
actions actually chosen (bandit setup), (b) it can be applied with
countably infinite expert classes, and (c) it copes with losses
that may grow in time appropriately slowly. We prove loss bounds
against an adaptive adversary. From this, we obtain a master
algorithm for ``reactive'' experts problems, which means that the
master's actions may influence the behavior of the adversary. Our
algorithm can significantly outperform standard experts algorithms
on such problems. Finally, we combine it with a universal expert
class. The resulting universal learner performs -- in a certain
sense -- almost as well as any computable strategy, for any online
decision problem. We also specify the (worst-case) convergence
speed, which is very slow.",
znote = "Acceptance rate: 30/98 = 30\%",
}
@InProceedings{Hutter:05iors,
author = "Shane Legg and Marcus Hutter",
title = "A Universal Measure of Intelligence for Artificial Agents",
booktitle = "Proc. 21st International Joint Conf. on Artificial Intelligence ({IJCAI-2005})",
pages = "1509--1510",
_editor = "L. P. Kaelbling and A. Saffiotti",
_publisher = "Professional Book Center",
address = "Edinburgh, Scottland",
_month = aug,
year = "2005",
bibtex = "http://www.hutter1.net/official/bib.htm#iors",
http = "http://dl.acm.org/citation.cfm?id=1642293.1642533",
pdf = "http://www.hutter1.net/ai/iors.pdf",
ps = "http://www.hutter1.net/ai/iors.ps",
slides = "http://www.hutter1.net/ai/siors.pdf",
project = "http://www.hutter1.net/official/projects.htm#uai",
press = "http://www.hutter1.net/official/press.htm#ior",
isbn_print = "0-938075-93-4",
isbn_cd = "0-938075-94-2",
support = "SNF grant 2100-67712",
znote = "Acceptance rate: 112/453 = 25\%",
}
@InProceedings{Hutter:05fuds,
author = "Shane Legg and Marcus Hutter",
title = "Fitness Uniform Deletion for Robust Optimization",
booktitle = "Proc. Genetic and Evolutionary Computation Conference ({GECCO'05})",
address = "Washington, OR, USA",
editor = "H.-G. Beyer et al.",
publisher = "ACM SigEvo",
_month = jun,
year = "2005",
pages = "1271--1278",
bibtex = "http://www.hutter1.net/official/bib.htm#fuds",
http = "http://www.hutter1.net/ai/fuds.htm",
url = "http://arxiv.org/abs/cs.NE/0504035",
pdf = "http://www.hutter1.net/ai/fuds.pdf",
ps = "http://www.hutter1.net/ai/fuds.ps",
latex = "http://www.hutter1.net/ai/fuds.zip",
slides = "http://www.hutter1.net/ai/sfuds.pdf",
slidesppt = "http://www.hutter1.net/ai/sfuds.ppt",
project = "http://www.hutter1.net/official/projects.htm#optimize",
press = "http://www.hutter1.net/official/press.htm#fuss",
code1 = "http://www.hutter1.net/ai/fussdd.cpp",
code2 = "http://www.hutter1.net/ai/fussdd.h",
code3 = "http://www.hutter1.net/ai/fusstsp.cpp",
code4 = "http://www.hutter1.net/ai/fusstsp.h",
doi = "10.1145/1068009.1068216",
isbn = "1-59593-010-8",
keywords = "Evolutionary algorithm, deletion schemes, fitness evaluation,
optimization, fitness landscapes, (self)adaptation.",
abstract = "A commonly experienced problem with population based optimisation
methods is the gradual decline in population diversity that tends
to occur over time. This can slow a system's progress or even
halt it completely if the population converges on a local optimum
from which it cannot escape. In this paper we present the Fitness
Uniform Deletion Scheme (FUDS), a simple but somewhat
unconventional approach to this problem. Under FUDS the deletion
operation is modified to only delete those individuals which are
``common'' in the sense that there exist many other individuals of
similar fitness in the population. This makes it impossible for
the population to collapse to a collection of highly related
individuals with similar fitness. Our experimental results on a
range of optimisation problems confirm this, in particular for
deceptive optimisation problems the performance is significantly
more robust to variation in the selection intensity.",
znote = "Acceptance rate: 253/549 = 46\%",
}
@Article{Hutter:05expertx,
author = "Marcus Hutter and Jan Poland",
title = "Adaptive Online Prediction by Following the Perturbed Leader",
volume = "6",
_month = apr,
year = "2005",
pages = "639--660",
journal = "Journal of Machine Learning Research",
publisher = "Microtome",
bibtex = "http://www.hutter1.net/official/bib.htm#expertx",
http = "http://www.hutter1.net/ai/expertx.htm",
url = "http://arxiv.org/abs/cs.AI/0504078",
url2 = "http://www.jmlr.org/papers/v6/hutter05a.html",
pdf = "http://www.hutter1.net/ai/expertx.pdf",
ps = "http://www.hutter1.net/ai/expertx.ps",
latex = "http://www.hutter1.net/ai/expertx.tex",
slides = "http://www.hutter1.net/ai/sexpert.pdf",
project = "http://www.hutter1.net/official/projects.htm#expert",
issn = "1532-4435",
keywords = "Prediction with Expert Advice, Follow the Perturbed Leader,
general weights, adaptive learning rate,
adaptive adversary, hierarchy of experts,
expected and high probability bounds, general alphabet and loss,
online sequential prediction.",
abstract = "When applying aggregating strategies to Prediction with Expert
Advice, the learning rate must be adaptively tuned. The natural
choice of sqrt(complexity/current loss) renders the analysis of
Weighted Majority derivatives quite complicated. In particular,
for arbitrary weights there have been no results proven so far.
The analysis of the alternative ``Follow the Perturbed Leader''
(FPL) algorithm from Kalai & Vempala (2003) (based on Hannan's
algorithm) is easier. We derive loss bounds for adaptive learning
rate and both finite expert classes with uniform weights and
countable expert classes with arbitrary weights. For the former
setup, our loss bounds match the best known results so far, while
for the latter our results are new.",
}
@Article{Hutter:05mifs,
author = "Marcus Hutter and Marco Zaffalon",
title = "Distribution of Mutual Information from Complete and Incomplete Data",
journal = "Computational Statistics \& Data Analysis",
volume = "48",
number = "3",
pages = "633--657",
_month = mar,
year = "2005",
publisher = "Elsevier Science",
bibtex = "http://www.hutter1.net/official/bib.htm#mifs",
http = "http://www.hutter1.net/ai/mifs.htm",
url = "http://arxiv.org/abs/cs.LG/0403025",
pdf = "http://www.hutter1.net/ai/mifs.pdf",
ps = "http://www.hutter1.net/ai/mifs.ps",
latex = "http://www.hutter1.net/ai/mifs.zip",
slides = "http://www.hutter1.net/ai/smimiss.pdf",
slidesppt = "http://www.hutter1.net/ai/smimiss.ppt",
project = "http://www.hutter1.net/official/projects.htm#robust",
code = "http://www.hutter1.net/ai/mifs.cpp",
doi = "10.1016/j.csda.2004.03.010",
issn = "0167-9473",
categories = "I.2. [Artificial Intelligence]",
keywords = "Mutual information, cross entropy, Dirichlet distribution, second
order distribution, expectation and variance of mutual
information, feature selection, filters, naive Bayes classifier,
Bayesian statistics.",
abstract = "Mutual information is widely used, in a descriptive way, to measure the
stochastic dependence of categorical random variables. In order to address
questions such as the reliability of the descriptive value, one must consider
sample-to-population inferential approaches. This paper deals with the
posterior distribution of mutual information, as obtained in a Bayesian
framework by a second-order Dirichlet prior distribution. The exact analytical
expression for the mean, and analytical approximations for the variance,
skewness and kurtosis are derived. These approximations have a guaranteed
accuracy level of the order O(1/n^3), where n is the sample size. Leading order
approximations for the mean and the variance are derived in the case of
incomplete samples. The derived analytical expressions allow the distribution
of mutual information to be approximated reliably and quickly. In fact, the
derived expressions can be computed with the same order of complexity needed
for descriptive mutual information. This makes the distribution of mutual
information become a concrete alternative to descriptive mutual information in
many applications which would benefit from moving to the inductive side. Some
of these prospective applications are discussed, and one of them, namely
feature selection, is shown to perform significantly better when inductive
mutual information is used.",
}
@InProceedings{Hutter:05mdlreg,
author = "Jan Poland and Marcus Hutter",
title = "Strong Asymptotic Assertions for Discrete {MDL} in Regression and Classification",
booktitle = "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})",
address = "Enschede",
_editor = "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt",
pages = "67--72",
_month = feb,
year = "2005",
_number = "WP05-03",
_series = "CTIT Workshop Proceedings Series",
_organization = "CTIT Research Institute, University of Twente",
bibtex = "http://www.hutter1.net/official/bib.htm#mdlreg",
url = "http://arxiv.org/abs/math.ST/0502315",
conf = "http://hmi.ewi.utwente.nl/conference/benelearn2005",
pdf = "http://www.hutter1.net/ai/mdlreg.pdf",
ps = "http://www.hutter1.net/ai/mdlreg.ps",
latex = "http://www.hutter1.net/ai/mdlreg.tex",
slides = "http://www.hutter1.net/ai/smdlreg.pdf",
slidesppt = "http://www.hutter1.net/ai/smdlreg.ppt",
project = "http://www.hutter1.net/official/projects.htm#mdl",
issn = "0929-0672",
keywords = "Regression, Classification, Sequence Prediction,
Machine Learning, Minimum Description Length, Bayes Mixture,
Marginalization, Convergence, Discrete Model Classes.",
abstract = "We study the properties of the MDL (or maximum penalized
complexity) estimator for Regression and Classification, where the
underlying model class is countable. We show in particular a
finite bound on the Hellinger losses under the only assumption
that there is a ``true'' model contained in the class. This implies
almost sure convergence of the predictive distribution to the true
one at a fast rate. It corresponds to Solomonoff's central theorem
of universal induction, however with a bound that is exponentially
larger.",
}
@InProceedings{Hutter:05actexp,
author = "Jan Poland and Marcus Hutter",
title = "Master Algorithms for Active Experts Problems based on Increasing Loss Values",
booktitle = "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})",
address = "Enschede",
_editor = "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt",
pages = "59--66",
_month = feb,
year = "2005",
_number = "WP05-03",
_series = "CTIT Workshop Proceedings Series",
_organization = "CTIT Research Institute, University of Twente",
bibtex = "http://www.hutter1.net/official/bib.htm#actexp",
url = "http://arxiv.org/abs/cs.LG/0502067",
conf = "http://hmi.ewi.utwente.nl/conference/benelearn2005",
pdf = "http://www.hutter1.net/ai/actexp.pdf",
ps = "http://www.hutter1.net/ai/actexp.ps",
latex = "http://www.hutter1.net/ai/actexp.tex",
slides = "http://www.hutter1.net/ai/sactexp.pdf",
slidesppt = "http://www.hutter1.net/ai/sactexp.ppt",
project = "http://www.hutter1.net/official/projects.htm#expert",
issn = "0929-0672",
keywords = "Prediction with expert advice, responsive
environments, partial observation game, bandits, universal
learning, asymptotic optimality.",
abstract = "We specify an experts algorithm with the following
characteristics: (a) it uses only feedback from the actions
actually chosen (bandit setup), (b) it can be applied with
countably infinite expert classes, and (c) it copes with
losses that may grow in time appropriately slowly. We
prove loss bounds against an adaptive adversary. From this, we
obtain master algorithms for ``active experts problems'', which
means that the master's actions may influence the behavior of
the adversary. Our algorithm can significantly outperform
standard experts algorithms on such problems. Finally, we
combine it with a universal expert class. This results in a
(computationally infeasible) universal master algorithm
which performs - in a certain sense - almost as well as any
computable strategy, for any online problem.",
}
@Slides{Hutter:05predict,
author = "Marcus Hutter",
title = "How to predict with {Bayes}, {MDL}, and {Experts}",
_month = jan,
year = "2005",
note = "Presented at the Machine Learning Summer School (MLSS)",
http = "http://canberra05.mlss.cc/",
url = "http://www.idsia.ch/~marcus/ai/predict.htm",
slides = "http://www.idsia.ch/~marcus/ai/spredict.pdf",
}
@InProceedings{Hutter:05bayestree,
author = "Marcus Hutter",
title = "Fast Non-Parametric {B}ayesian Inference on Infinite Trees",
booktitle = "Proc. 10th International Conf. on Artificial Intelligence and Statistics ({AISTATS-2005})",
_address = "Barbados",
_editor = "R. G. Cowell and Z. Ghahramani",
publisher = "Society for Artificial Intelligence and Statistics",
pages = "144--151",
_month = jan,
year = "2005",
bibtex = "http://www.hutter1.net/official/bib.htm#bayestree",
http = "http://www.hutter1.net/ai/bayestree.htm",
url = "http://arxiv.org/abs/math.PR/0411515",
pdf = "http://www.hutter1.net/ai/bayestree.pdf",
ps = "http://www.hutter1.net/ai/bayestree.ps",
latex = "http://www.hutter1.net/ai/bayestree.zip",
slides = "http://www.hutter1.net/ai/sbayestree.pdf",
project = "http://www.hutter1.net/official/projects.htm#bayes",
code = "http://www.hutter1.net/ai/bayestree.c",
isbn = "0-9727358-1-X",
keywords = "Bayesian density estimation, exact linear time algorithm,
non-parametric inference, adaptive infinite tree, Polya tree,
scale invariance.",
abstract = "Given i.i.d. data from an unknown distribution,
we consider the problem of predicting future items.
An adaptive way to estimate the probability density
is to recursively subdivide the domain to an appropriate
data-dependent granularity. A Bayesian would assign a
data-independent prior probability to ``subdivide'', which leads
to a prior over infinite(ly many) trees. We derive an exact, fast,
and simple inference algorithm for such a prior, for the data
evidence, the predictive distribution, the effective model
dimension, and other quantities.",
znote = "Acceptance rate: 57/150 = 38\%",
}
%-------------Publications-of-Marcus-Hutter-2004--------------%
@TechReport{Hutter:04mdp,
author = "Shane Legg and Marcus Hutter",
number = "IDSIA-21-04",
title = "Ergodic {MDP}s Admit Self-Optimising Policies",
year = "2004",
institution = "{IDSIA}",
}
TechReport{Hutter:04env,
author = "Shane Legg and Marcus Hutter",
number = "IDSIA-20-04",
title = "A Taxonomy for Abstract Environments",
year = "2004",
institution = "{IDSIA}",
}
@Book{Hutter:04uaibook,
author = "Marcus Hutter",
title = "Universal Artificial Intelligence:
Sequential Decisions based on Algorithmic Probability",
_series = "EATCS",
publisher = "Springer",
address = "Berlin",
year = "2005",
isbn = "3-540-22139-5",
isbn-online = "978-3-540-26877-2",
doi = "10.1007/b138233",
note = "300 pages, http://www.hutter1.net/ai/uaibook.htm",
url = "http://www.hutter1.net/ai/uaibook.htm",
review1 = "AIJ: http://dx.doi.org/10.1016/j.artint.2006.10.005",
review2 = "ACM: http://www.reviews.com/review/review_review.cfm?review_id=131175",
reviews = "Amazon: http://www.amazon.com/exec/obidos/redirect?tag=homepageofm0a-20&path=ASIN/3540221395",
slides = "http://www.hutter1.net/ai/suaibook.pdf",
video = "http://vimeo.com/14888930",
keywords = "Artificial intelligence; algorithmic probability;
sequential decision theory; Solomonoff induction;
Kolmogorov complexity; Bayes mixture distributions;
reinforcement learning; universal sequence prediction;
tight loss and error bounds; Levin search;
strategic games; function minimization; supervised learning.",
abstract = "This book presents sequential decision theory from a
novel algorithmic information theory perspective. While the former
theory is suited for active agents in known environments, the
latter is suited for passive prediction of unknown environments.
The book introduces these two well-known but very different ideas
and removes the limitations by unifying them to one parameter-free
theory of an optimal reinforcement learning agent interacting with
an arbitrary unknown world. Most if not all AI problems can easily
be formulated within this theory, which reduces the conceptual
problems to pure computational ones. Considered problem classes
include sequence prediction, strategic games, function
minimization, reinforcement and supervised learning. Formal
definitions of intelligence order relations, the horizon problem
and relations to other approaches to AI are discussed. One
intention of this book is to excite a broader AI audience about
abstract algorithmic information theory concepts, and conversely
to inform theorists about exciting applications to AI.",
support = "SNF grant 2000-61847",
}
@InProceedings{Hutter:04mlconvx,
author = "Marcus Hutter and Andrej A. Muchnik",
title = "Universal Convergence of Semimeasures on Individual Random Sequences",
booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
address = "Padova, Italy",
series = "LNAI",
volume = "3244",
_editor = "S. Ben-David and J. Case and A. Maruoka",
publisher = "Springer",
pages = "234--248",
year = "2004",
doi = "10.1007/978-3-540-30215-5_19",
issn = "0302-9743",
isbn = "3-540-23356-3",
http = "http://www.hutter1.net/ai/mlconvx.htm",
url = "http://arxiv.org/abs/cs.LG/0407057",
keywords = "Sequence prediction; Algorithmic Information Theory;
universal enumerable semimeasure; mixture distributions;
posterior convergence; Martin-L{\"o}f randomness;
quasimeasures.",
abstract = "Solomonoff's central result on induction is that the posterior of
a universal semimeasure M converges rapidly and with probability
1 to the true sequence generating posterior mu, if the latter is
computable. Hence, M is eligible as a universal sequence predictor
in case of unknown mu. Despite some nearby results and proofs in
the literature, the stronger result of convergence for all
(Martin-Loef) random sequences remained open. Such a convergence
result would be particularly interesting and natural, since
randomness can be defined in terms of M itself. We show that there
are universal semimeasures M which do not converge for all random
sequences, i.e. we give a partial negative answer to the open
problem. We also provide a positive answer for some non-universal
semimeasures. We define the incomputable measure D as a mixture
over all computable measures and the enumerable semimeasure W as a
mixture over all enumerable nearly-measures. We show that W
converges to D and D to mu on all random sequences. The Hellinger
distance measuring closeness of two distributions plays
a central role.",
znote = "Acceptance rate: 29/91 = 32\%",
}
@InProceedings{Hutter:04expert,
author = "Marcus Hutter and Jan Poland",
title = "Prediction with Expert Advice by Following the Perturbed Leader for General Weights",
booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
address = "Padova, Italy",
series = "LNAI",
volume = "3244",
_editor = "S. Ben-David and J. Case and A. Maruoka",
publisher = "Springer",
pages = "279--293",
year = "2004",
doi = "10.1007/978-3-540-30215-5_22",
issn = "0302-9743",
isbn = "3-540-23356-3",
http = "http://www.hutter1.net/ai/expert.htm",
url = "http://arxiv.org/abs/cs.LG/0405043",
keywords = "Prediction with Expert Advice, Follow the Perturbed Leader,
general weights, adaptive learning rate,
hierarchy of experts, expected and high probability bounds,
general alphabet and loss, online sequential prediction.",
abstract = "When applying aggregating strategies to Prediction with Expert
Advice, the learning rate must be adaptively tuned. The natural
choice of sqrt(complexity/current loss) renders the
analysis of Weighted Majority derivatives quite complicated. In
particular, for arbitrary weights there have been no results
proven so far. The analysis of the alternative ``Follow the
Perturbed Leader'' (FPL) algorithm from Kalai \& Vempala (2003) (based on
Hannan's algorithm) is easier. We derive loss bounds for adaptive
learning rate and both finite expert classes with uniform weights
and countable expert classes with arbitrary weights. For the
former setup, our loss bounds match the best known results so far,
while for the latter our results are new.",
znote = "Acceptance rate: 29/91 = 32\%",
}
@InProceedings{Hutter:04mdlspeed,
author = "Jan Poland and Marcus Hutter",
title = "On the convergence speed of {MDL} predictions for {B}ernoulli sequences",
booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})",
address = "Padova, Italy",
series = "LNAI",
volume = "3244",
_editor = "S. Ben-David and J. Case and A. Maruoka",
publisher = "Springer",
pages = "294--308",
year = "2004",
doi = "10.1007/978-3-540-30215-5_23",
issn = "0302-9743",
isbn = "3-540-23356-3",
http = "http://www.hutter1.net/ai/mdlspeed.htm",
url = "http://arxiv.org/abs/cs.LG/0407039",
keywords = "MDL, Minimum Description Length, Convergence Rate,
Prediction, Bernoulli, Discrete Model Class.",
abstract = "We consider the Minimum Description Length principle for online
sequence prediction. If the underlying model class is discrete,
then the total expected square loss is a particularly interesting
performance measure: (a) this quantity is bounded, implying
convergence with probability one, and (b) it additionally
specifies a `rate of convergence'. Generally, for MDL only
exponential loss bounds hold, as opposed to the linear bounds for
a Bayes mixture. We show that this is even the case if the model
class contains only Bernoulli distributions. We derive a new upper
bound on the prediction error for countable Bernoulli classes.
This implies a small bound (comparable to the one for Bayes
mixtures) for certain important model classes. The results apply
to many Machine Learning tasks including classification and
hypothesis testing. We provide arguments that our theorems
generalize to countable classes of i.i.d. models.",
znote = "Acceptance rate: 29/91 = 32\%",
}
@TechReport{Hutter:04bayespea,
author = "Marcus Hutter",
title = "Online Prediction -- {B}ayes versus Experts",
institution = "http://www.idsia.ch/$_{^\sim}$marcus/ai/bayespea.htm",
_month = jul,
pages = "4 pages",
year = "2004",
note = "Presented at the {\em EU PASCAL Workshop on
Learning Theoretic and Bayesian Inductive Principles (LTBIP-2004)}",
url = "http://www.hutter1.net/ai/bayespea.htm",
ps = "http://www.hutter1.net/ai/bayespea.ps",
pdf = "http://www.hutter1.net/ai/bayespea.pdf",
slides = "http://www.hutter1.net/ai/sbayespea.pdf",
keywords = "Bayesian sequence prediction;
Prediction with Expert Advice;
general weights, alphabet and loss.",
abstract = "We derive a very general regret bound in the framework of
prediction with expert advice, which challenges the best known
regret bound for Bayesian sequence prediction. Both bounds of the
form $\sqrt{\mbox{Loss}\times\mbox{complexity}}$ hold for any
bounded loss-function, any prediction and observation spaces,
arbitrary expert/environment classes and weights, and unknown
sequence length.",
}
@InProceedings{Hutter:04mdl2p,
author = "Jan Poland and Marcus Hutter",
title = "Convergence of Discrete {MDL} for Sequential Prediction",
booktitle = "Proc. 17th Annual Conf. on Learning Theory ({COLT'04})",
address = "Banff, Canada",
series = "LNAI",
volume = "3120",
_editor = "J. Shawe-Taylor and Y. Singer",
publisher = "Springer",
pages = "300--314",
year = "2004",
doi = "10.1007/978-3-540-27819-1_21",
isbn = "3-540-22282-0",
http = "http://www.hutter1.net/ai/mdl2p.htm",
url = "http://arxiv.org/abs/cs.LG/0404057",
keywords = "Minimum Description Length, Sequence Prediction,
Convergence, Discrete Model Classes, Universal Induction,
Stabilization, Algorithmic Information Theory.",
abstract = "We study the properties of the Minimum Description Length principle for
sequence prediction, considering a two-part MDL estimator which is chosen from
a countable class of models. This applies in particular to the important case
of universal sequence prediction, where the model class corresponds to all
algorithms for some fixed universal Turing machine (this correspondence is by
enumerable semimeasures, hence the resulting models are stochastic). We prove
convergence theorems similar to Solomonoff's theorem of universal induction,
which also holds for general Bayes mixtures. The bound characterizing the
convergence speed for MDL predictions is exponentially larger as compared to
Bayes mixtures. We observe that there are at least three different ways of
using MDL for prediction. One of these has worse prediction properties, for
which predictions only converge if the MDL estimator stabilizes. We establish
sufficient conditions for this to occur. Finally, some immediate consequences
for complexity relations and randomness criteria are proven.",
znote = "Acceptance rate: 44/107 = 41\%",
}
@InProceedings{Hutter:04fussexp,
author = "Shane Legg and Marcus Hutter and Akshat Kumar",
title = "Tournament versus Fitness Uniform Selection",
booktitle = "Proc. 2004 Congress on Evolutionary Computation ({CEC'04})",
address = "Portland, OR, USA",
xeditor = "??",
publisher = "IEEE",
isbn = "0-7803-8515-2",
_month = jun,
year = "2004",
pages = "2144--2151",
keywords = "Selection schemes, fitness evaluation, optimization,
fitness landscapes, basic working principles of evolutionary computations,
(self)adaptation, evolutionary algorithm,
deceptive \& multimodal optimization problems.",
http = "http://www.hutter1.net/ai/fussexp.htm",
url = "http://arxiv.org/abs/cs.LG/0403038",
doi = "10.1109/CEC.2004.1331162",
press = "http://www.trnmag.com/Stories/032801/Diversity_trumps_fitness_032801.html",
abstract = "In evolutionary algorithms a critical parameter that must be tuned is
that of selection pressure. If it is set too low then the rate of
convergence towards the optimum is likely to be slow. Alternatively
if the selection pressure is set too high the system is likely to
become stuck in a local optimum due to a loss of diversity in the
population. The recent Fitness Uniform Selection Scheme (FUSS) is a
conceptually simple but somewhat radical approach to addressing this
problem --- rather than biasing the selection towards higher fitness,
FUSS biases selection towards sparsely populated fitness levels. In
this paper we compare the relative performance of FUSS with the well
known tournament selection scheme on a range of problems.",
znote = "Acceptance rate: 300/460 = 65\%",
}
%-------------Publications-of-Marcus-Hutter-2003--------------%
@PhDThesis{Hutter:03habil,
author = "Marcus Hutter",
school = "Fakult{\"a}t f{\"u}r Informatik",
address = "TU M{\"u}nchen",
title = "Optimal Sequential Decisions based on Algorithmic Probability",
year = "2003",
pages = "1--288",
http = "http://www.hutter1.net/ai/habil.htm",
url = "http://arxiv.org/abs/cs.AI/0306091",
keywords = "Artificial intelligence; algorithmic probability;
sequential decision theory; Solomonoff induction;
Kolmogorov complexity; Bayes-mixture distributions;
reinforcement learning; universal sequence prediction;
tight loss and error bounds; Levin search;
strategic games; function minimization;
supervised learning.",
abstract = "Decision theory formally solves the problem of rational agents in
uncertain worlds if the true environmental prior probability
distribution is known. Solomonoff's theory of universal induction
formally solves the problem of sequence prediction for unknown
prior distribution. In this \thesis\ both ideas are unified to one
parameter-free theory for universal Artificial Intelligence. We
give strong arguments that the resulting AIXI model is the most
intelligent unbiased agent possible. We outline for a number of
problem classes, including sequence prediction, strategic games,
function minimization, reinforcement and supervised learning, how
the AIXI model can formally solve them. The major drawback of the
AIXI model is that it is uncomputable. To overcome this problem,
we construct a modified algorithm AIXI$tl$, which is still
effectively more intelligent than any other time $t$ and length $l$
bounded agent. The computation time of AIXI$tl$ is of the order
$t\cdot 2^l$. The discussion includes formal definitions of
intelligence order relations, the horizon problem and relations of
the AIXI theory to other AI approaches.",
}
@InProceedings{Hutter:03unimdl,
author = "Marcus Hutter",
title = "Sequence Prediction based on Monotone Complexity",
booktitle = "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})",
address = "Washington, DC, USA",
series = "LNAI",
volume = "2777",
_editor = "B. Sch{\"o}lkopf and M. K. Warmuth",
publisher = "Springer",
pages = "506--521",
year = "2003",
isbn = "3-540-40720-0",
doi = "10.1007/978-3-540-45167-9_37",
http = "http://www.hutter1.net/ai/unimdl.htm",
url = "http://arxiv.org/abs/cs.AI/0306036",
keywords = "Sequence prediction; Algorithmic Information Theory;
Solomonoff's prior; Monotone Kolmogorov Complexity;
Minimal Description Length; Convergence;
Self-Optimizingness",
abstract = "This paper studies sequence prediction based on the
monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on
universal MDL. $m$ is extremely close to Solomonoff's prior $M$,
the latter being an excellent predictor in deterministic as well
as probabilistic environments, where performance is measured in
terms of convergence of posteriors or losses. Despite this
closeness to $M$, it is difficult to assess the prediction quality
of $m$, since little is known about the closeness of their
posteriors, which are the important quantities for prediction.
We show that for deterministic computable environments, the
``posterior'' and losses of $m$ converge, but rapid convergence
could only be shown on-sequence; the off-sequence behavior is
unclear. In probabilistic environments, neither the posterior nor
the losses converge, in general.",
znote = "Acceptance rate: 49/92 = 53\%",
}
@InProceedings{Hutter:03unipriors,
author = "Marcus Hutter",
title = "On the Existence and Convergence of Computable Universal Priors",
booktitle = "Proc. 14th International Conf. on Algorithmic Learning Theory ({ALT'03})",
address = "Sapporo, Japan",
_editor = "Ricard Gavald{\'a} and Klaus P. Jantke and Eiji Takimoto",
series = "LNAI",
volume = "2842",
publisher = "Springer",
pages = "298--312",
_month = sep,
year = "2003",
ISSN = "0302-9743",
isbn = "3-540-20291-9",
doi = "10.1007/978-3-540-39624-6_24",
http = "http://www.hutter1.net/ai/uniprior.htm",
url = "http://arxiv.org/abs/cs.LG/0305052",
keywords = "Sequence prediction; Algorithmic Information Theory;
Solomonoff's prior; universal probability;
mixture distributions; posterior convergence;
computability concepts; Martin-L{\"o}f randomness.",
abstract = "Solomonoff unified Occam's razor and Epicurus' principle
of multiple explanations to one elegant, formal, universal theory
of inductive inference, which initiated the field of algorithmic
information theory. His central result is that the posterior of
his universal semimeasure $M$ converges rapidly to the true
sequence generating posterior $\mu$, if the latter is computable.
Hence, $M$ is eligible as a universal predictor in case of unknown
$\mu$. We investigates the existence, computability and convergence of
universal (semi)measures for a hierarchy of computability classes:
finitely computable, estimable, (co)enumerable, and approximable.
For instance, $\MM(x)$ is known to be enumerable, but not finitely
computable, and to dominates all enumerable semimeasures.
We define seven classes of (semi)measures based on these four
computability concepts. Each class may or may not contain a
(semi)measures which dominates all elements of another class. The
analysis of these 49 cases can be reduced to four basic cases, two
of them being new. We present proofs for discrete and continuous
semimeasures.
We also investigate more closely the type of convergence, possibly
implied by universality (in difference and in ratio, with probability
1, in mean sum, and for Martin-L{\"o}f random sequences).",
znote = "Acceptance rate: 19/37 = 51\%?",
}
@InProceedings{Hutter:03mlconv,
author = "Marcus Hutter",
title = "An Open Problem Regarding the Convergence
of Universal A Priori Probability",
booktitle = "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})",
address = "Washington, DC, USA",
series = "LNAI",
volume = "2777",
_editor = "B. Sch{\"o}lkopf and M. K. Warmuth",
publisher = "Springer",
pages = "738--740",
year = "2003",
isbn = "3-540-40720-0",
doi = "10.1007/978-3-540-45167-9_58",
url = "http://www.hutter1.net/ai/mlconv.htm",
keywords = "Sequence prediction; Algorithmic Information Theory;
Solomonoff's prior; universal probability;
posterior convergence; Martin-L{\"o}f randomness.",
abstract = "Is the textbook result that Solomonoff's universal
posterior converges to the true posterior for all Martin-L{\"o}f
random sequences true?",
}
@Article{Hutter:03optisp,
author = "Marcus Hutter",
title = "Optimality of Universal {B}ayesian Prediction for General Loss and Alphabet",
_month = Nov,
volume = "4",
year = "2003",
pages = "971--1000",
journal = "Journal of Machine Learning Research",
publisher = "MIT Press",
http = "http://www.hutter1.net/ai/optisp.htm",
url = "http://arxiv.org/abs/cs.LG/0311014",
url2 = "http://www.jmlr.org/papers/volume4/hutter03a/",
url3 = "http://www.jmlr.org/papers/v4/hutter03a.html",
issn = "1532-4435",
keywords = "Bayesian sequence prediction; mixture distributions; Solomonoff
induction; Kolmogorov complexity; learning; universal probability;
tight loss and error bounds; Pareto-optimality; games of chance;
classification.",
abstract = "Various optimality properties of universal sequence predictors
based on Bayes-mixtures in general, and Solomonoff's prediction
scheme in particular, will be studied. The probability of
observing $x_t$ at time $t$, given past observations
$x_1...x_{t-1}$ can be computed with the chain rule if the true
generating distribution $\mu$ of the sequences $x_1x_2x_3...$ is
known. If $\mu$ is unknown, but known to belong to a countable or
continuous class $\M$ one can base ones prediction on the
Bayes-mixture $\xi$ defined as a $w_\nu$-weighted sum or integral
of distributions $\nu\in\M$. The cumulative expected loss of the
Bayes-optimal universal prediction scheme based on $\xi$ is shown
to be close to the loss of the Bayes-optimal, but infeasible
prediction scheme based on $\mu$. We show that the bounds are
tight and that no other predictor can lead to significantly
smaller bounds. Furthermore, for various performance measures, we
show Pareto-optimality of $\xi$ and give an Occam's razor argument
that the choice $w_\nu\sim 2^{-K(\nu)}$ for the weights is
optimal, where $K(\nu)$ is the length of the shortest program
describing $\nu$. The results are applied to games of chance,
defined as a sequence of bets, observations, and rewards. The
prediction schemes (and bounds) are compared to the popular
predictors based on expert advice. Extensions to infinite
alphabets, partial, delayed and probabilistic prediction,
classification, and more active systems are briefly discussed.",
znote = "Inofficial numbers: Acceptance rate: 27\%",
}
@InProceedings{Hutter:03idm,
author = "Marcus Hutter",
title = "Robust Estimators under the {I}mprecise {D}irichlet {M}odel",
booktitle = "Proc. 3rd International Symposium on
Imprecise Probabilities and Their Application ({ISIPTA-2003})",
_editor = "Jean-Marc Bernard and Teddy Seidenfeld and Marco Zaffalon",
publisher = "Carleton Scientific",
series = "Proceedings in Informatics",
volume = "18",
address = "Lugano,Switzerland",
year = "2003",
pages = "274--289",
isbn = "1-894145-17-8",
http = "http://www.hutter1.net/ai/idm.htm",
url = "http://arxiv.org/abs/math.PR/0305121",
keywords = "Imprecise Dirichlet Model; exact, conservative, approximate,
robust, confidence interval estimates; entropy; mutual information.",
abstract = "Walley's Imprecise Dirichlet Model (IDM) for categorical data
overcomes several fundamental problems which other approaches to
uncertainty suffer from. Yet, to be useful in practice, one needs
efficient ways for computing the imprecise=robust sets or
intervals. The main objective of this work is to derive exact,
conservative, and approximate, robust and credible interval
estimates under the IDM for a large class of statistical
estimators, including the entropy and mutual information.",
znote = "Inofficial numbers: Acceptance rate: 44/55 = 80\% ?",
}
@InProceedings{Hutter:03mimiss,
author = "Marcus Hutter and Marco Zaffalon",
title = "Bayesian Treatment of Incomplete Discrete Data applied
to Mutual Information and Feature Selection",
_month = sep,
year = "2003",
pages = "396--406",
series = "LNAI",
volume = "2821",
booktitle = "Proc. 26th German Conf. on Artificial Intelligence (KI-2003)",
_editor = "A. G{\"u}nter, R. Kruse and B. Neumann",
address = "Hamburg, Germany",
publisher = "Springer",
doi = "10.1007/978-3-540-39451-8_29",
issn = "0302-9743",
isbn = "3-540-00168-9",
http = "http://www.hutter1.net/ai/mimiss.htm",
url = "http://arxiv.org/abs/cs.LG/0306126",
keywords = "Incomplete data, Bayesian statistics, expectation maximization,
global optimization, Mutual Information, Cross Entropy, Dirichlet
distribution, Second order distribution, Credible intervals,
expectation and variance of mutual information, missing data,
Robust feature selection, Filter approach, naive Bayes classifier.",
abstract = "Given the joint chances of a pair of random variables one can
compute quantities of interest, like the mutual information. The
Bayesian treatment of unknown chances involves computing, from a
second order prior distribution and the data likelihood, a
posterior distribution of the chances. A common treatment of
incomplete data is to assume ignorability and determine the
chances by the expectation maximization (EM) algorithm. The two
different methods above are well established but typically
separated. This paper joins the two approaches in the case of
Dirichlet priors, and derives efficient approximations for the
mean, mode and the (co)variance of the chances and the mutual
information. Furthermore, we prove the unimodality of the
posterior distribution, whence the important property of
convergence of EM to the global maximum in the chosen framework.
These results are applied to the problem of selecting features for
incremental learning and naive Bayes classification. A fast filter
based on the distribution of mutual information is shown to
outperform the traditional filter based on empirical mutual
information on a number of incomplete real data sets.",
znote = "Acceptance rate: 42/90 = 46\%",
}
@Article{Hutter:03spupper,
author = "Marcus Hutter",
title = "Convergence and Loss Bounds for {Bayesian} Sequence Prediction",
_month = aug,
volume = "49",
number = "8",
year = "2003",
pages = "2061--2067",
address = "Manno(Lugano), Switzerland",
journal = "IEEE Transactions on Information Theory",
doi = "10.1109/TIT.2003.814488",
issn = "0018-9448",
http = "http://www.hutter1.net/ai/spupper.htm",
url = "http://arxiv.org/abs/cs.LG/0301014",
keywords = "Bayesian sequence prediction;
general loss function and bounds;
convergence; mixture distributions.",
abstract = "The probability of observing $x_t$ at time $t$, given past
observations $x_1...x_{t-1}$ can be computed with Bayes rule if
the true generating distribution $\mu$ of the sequences
$x_1x_2x_3...$ is known. If $\mu$ is unknown, but known to belong
to a class $M$ one can base ones prediction on the Bayes mix
$\xi$ defined as a weighted sum of distributions $\nu\in M$.
Various convergence results of the mixture posterior $\xi_t$ to
the true posterior $\mu_t$ are presented. In particular a new
(elementary) derivation of the convergence $\xi_t/\mu_t\to 1$ is
provided, which additionally gives the rate of convergence. A
general sequence predictor is allowed to choose an action $y_t$
based on $x_1...x_{t-1}$ and receives loss $\ell_{x_t y_t}$ if
$x_t$ is the next symbol of the sequence. No assumptions are made
on the structure of $\ell$ (apart from being bounded) and $M$.
The Bayes-optimal prediction scheme $\Lambda_\xi$ based on mixture
$\xi$ and the Bayes-optimal informed prediction scheme
$\Lambda_\mu$ are defined and the total loss $L_\xi$ of
$\Lambda_\xi$ is bounded in terms of the total loss $L_\mu$ of
$\Lambda_\mu$. It is shown that $L_\xi$ is bounded for bounded
$L_\mu$ and $L_\xi/L_\mu\to 1$ for $L_\mu\to \infty$. Convergence
of the instantaneous losses is also proven.",
}
%-------------Publications-of-Marcus-Hutter-2002--------------%
@InProceedings{Hutter:02feature,
author = "Marco Zaffalon and Marcus Hutter",
title = "Robust Feature Selection by Mutual Information Distributions",
_month = jun,
year = "2002",
pages = "577--584",
booktitle = "Proc. 18th International Conf. on
Uncertainty in Artificial Intelligence (UAI-2002)",
_editor = "A. Darwiche and N. Friedman",
publisher = "Morgan Kaufmann, San Francisco, CA",
isbn = "1-55860-897-4",
http = "http://www.hutter1.net/ai/feature.htm",
url = "http://arxiv.org/abs/cs.AI/0206006",
categories = "I.2. [Artificial Intelligence]",
keywords = "Robust feature selection, Filter approach, naive Bayes classifier,
Mutual Information, Cross Entropy, Dirichlet distribution, Second
order distribution, Bayesian statistics, Credible intervals,
expectation and variance of mutual information, missing data.",
abstract = "Mutual information is widely used in artificial intelligence, in a
descriptive way, to measure the stochastic dependence of discrete random
variables. In order to address questions such as the reliability of the
empirical value, one must consider sample-to-population inferential
approaches. This paper deals with the distribution of mutual information, as
obtained in a Bayesian framework by a second-order Dirichlet prior
distribution. The exact analytical expression for the mean and an
analytical approximation of the variance are reported. Asymptotic
approximations of the distribution are proposed. The results are applied to
the problem of selecting features for incremental learning and
classification of the naive Bayes classifier. A fast, newly defined method
is shown to outperform the traditional approach based on empirical mutual
information on a number of real data sets. Finally, a theoretical
development is reported that allows one to efficiently extend the above
methods to incomplete samples in an easy and effective way.",
znote = "Acceptance rate: 66/192 = 34\%",
}
@InProceedings{Hutter:02selfopt,
author = "Marcus Hutter",
title = "Self-Optimizing and {P}areto-Optimal Policies in
General Environments based on {B}ayes-Mixtures",
_month = jul,
series = "LNAI",
volume = "2375",
year = "2002",
pages = "364--379",
address = "Sydney, Australia",
booktitle = "Proc. 15th Annual Conf. on Computational Learning Theory ({COLT'02})",
_editor = "J. Kivinen and R. H. Sloan",
publisher = "Springer",
isbn = "978-3-540-43836-6",
doi = "10.1007/3-540-45435-7_25",
http = "http://www.hutter1.net/ai/selfopt.htm",
url = "http://arxiv.org/abs/cs.AI/0204040",
keywords = "Rational agents, sequential decision theory,
reinforcement learning, value function, Bayes mixtures,
self-optimizing policies, Pareto-optimality,
unbounded effective horizon, (non) Markov decision
processes.",
abstract = "The problem of making sequential decisions in unknown
probabilistic environments is studied. In cycle $t$ action $y_t$
results in perception $x_t$ and reward $r_t$, where all quantities
in general may depend on the complete history. The perception
$x_t'$ and reward $r_t$ are sampled from the (reactive)
environmental probability distribution $\mu$. This very general
setting includes, but is not limited to, (partial observable, k-th
order) Markov decision processes. Sequential decision theory tells
us how to act in order to maximize the total expected reward,
called value, if $\mu$ is known. Reinforcement learning is usually
used if $\mu$ is unknown. In the Bayesian approach one defines a
mixture distribution $\xi$ as a weighted sum of distributions
$\nu\in\M$, where $\M$ is any class of distributions including the
true environment $\mu$. We show that the Bayes-optimal policy
$p^\xi$ based on the mixture $\xi$ is self-optimizing in the sense
that the average value converges asymptotically for all $\mu\in\M$
to the optimal value achieved by the (infeasible) Bayes-optimal
policy $p^\mu$ which knows $\mu$ in advance. We show that the
necessary condition that $\M$ admits self-optimizing policies at
all, is also sufficient. No other structural assumptions are made
on $\M$. As an example application, we discuss ergodic Markov
decision processes, which allow for self-optimizing policies.
Furthermore, we show that $p^\xi$ is Pareto-optimal in the sense
that there is no other policy yielding higher or equal value in
{\em all} environments $\nu\in\M$ and a strictly higher value in
at least one.",
znote = "Acceptance rate: 26/55 = 47\%",
}
@InProceedings{Hutter:01xentropy,
author = "Marcus Hutter",
title = "Distribution of Mutual Information",
_month = dec,
booktitle = "Advances in Neural Information Processing Systems 14",
_editor = "T. G. Dietterich and S. Becker and Z. Ghahramani",
publisher = "MIT Press",
address = "Cambridge, MA, USA",
pages = "399--406",
year = "2002",
isbn = "0262042088",
http = "http://www.hutter1.net/ai/xentropy.htm",
url = "http://arxiv.org/abs/cs.AI/0112019",
categories = "I.2. [Artificial Intelligence]",
keywords = "Mutual Information, Cross Entropy, Dirichlet distribution, Second
order distribution, expectation and variance of mutual
information.",
abstract = "The mutual information of two random variables i and j with joint
probabilities t_ij is commonly used in learning Bayesian nets as
well as in many other fields. The chances t_ij are usually
estimated by the empirical sampling frequency n_ij/n leading to a
point estimate I(n_ij/n) for the mutual information. To answer
questions like ``is I(n_ij/n) consistent with zero?'' or ``what is
the probability that the true mutual information is much larger
than the point estimate?'' one has to go beyond the point estimate.
In the Bayesian framework one can answer these questions by
utilizing a (second order) prior distribution p(t) comprising
prior information about t. From the prior p(t) one can compute the
posterior p(t|n), from which the distribution p(I|n) of the mutual
information can be calculated. We derive reliable and quickly
computable approximations for p(I|n). We concentrate on the mean,
variance, skewness, and kurtosis, and non-informative priors. For
the mean we also give an exact expression. Numerical issues and
the range of validity are discussed.",
znote = "Acceptance rate: 196/660 = 30\%",
}
@InProceedings{Hutter:02fuss,
author = "Marcus Hutter",
title = "Fitness Uniform Selection to Preserve Genetic Diversity",
booktitle = "Proc. 2002 Congress on Evolutionary Computation (CEC-2002)",
address = "Honolulu, HI, USA",
publisher = "IEEE",
ISSN = "1098-7576",
_month = may,
year = "2002",
pages = "783--788",
keywords = "Evolutionary algorithms, fitness uniform selection strategy,
preserve diversity, local optima, evolution,
correlated recombination, crossover.",
http = "http://www.hutter1.net/ai/pfuss.htm",
url = "http://arxiv.org/abs/cs.AI/0103015",
abstract = "In evolutionary algorithms, the fitness of a population increases
with time by mutating and recombining individuals and by a biased
selection of more fit individuals. The right selection pressure is
critical in ensuring sufficient optimization progress on the one
hand and in preserving genetic diversity to be able to escape from
local optima on the other. We propose a new selection scheme,
which is uniform in the fitness values. It generates selection
pressure towards sparsely populated fitness regions, not
necessarily towards higher fitness, as is the case for all other
selection schemes. We show that the new selection scheme can be
much more effective than standard selection schemes.",
znote = "Acceptance rate: 264/372 = 71\%",
}
@Article{Hutter:02fast,
author = "Marcus Hutter",
title = "The Fastest and Shortest Algorithm for All Well-Defined Problems",
journal = "International Journal of Foundations of Computer Science",
publisher = "World Scientific",
volume = "13",
number = "3",
pages = "431--443",
year = "2002",
keywords = "Acceleration, Computational Complexity,
Algorithmic Information Theory, Kolmogorov Complexity, Blum's
Speed-up Theorem, Levin Search.",
http = "http://www.hutter1.net/ai/pfastprg.htm",
url = "http://arxiv.org/abs/cs.CC/0206022",
abstract = "An algorithm M is described that solves any well-defined problem
p as quickly as the fastest algorithm computing a solution to
p, save for a factor of 5 and low-order additive terms. M
optimally distributes resources between the execution of provably
correct p-solving programs and an enumeration of all proofs,
including relevant proofs of program correctness and of time
bounds on program runtimes. M avoids Blum's speed-up theorem by
ignoring programs without correctness proof. M has broader
applicability and can be faster than Levin's universal search, the
fastest method for inverting functions save for a large
multiplicative constant. An extension of Kolmogorov complexity and
two novel natural measures of function complexity are used to show
that the most efficient program computing some function f is
also among the shortest programs provably computing f.",
press = "http://guide.supereva.it/c_/interventi/2001/04/38469.shtml",
}
@Article{Hutter:02uspatent,
author = "Marcus Hutter",
title = "System and method for analysing and displaying two- or three-dimensional sets of data",
volume = "number US2002041701, pages 1--15",
journal = "{\rm BrainLAB}, US patent",
year = "2002",
url = "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=US2002041701&ID=US2002041701A1+I+",
}
%-------------Publications-of-Marcus-Hutter-2001--------------%
@Article{Hutter:01eupatent,
author = "Marcus Hutter",
title = "{S}tufenfreie {D}arstellung von zwei- oder dreidimensionalen Datens{\"a}tzen durch kr{\"u}mmungsminimierende {V}erschiebung von {P}ixelwerten",
volume = "number EP1184812, pages 1--19",
journal = "{\rm BrainLAB}, EU patent",
year = "2001",
url = "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=EP1184812&ID=EP+++1184812A1+I+",
}
@InProceedings{Hutter:01market,
author = "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber",
title = "Market-Based Reinforcement Learning in Partially Observable Worlds",
address = "Vienna",
_month = aug,
year = "2001",
pages = "865--873",
booktitle = "Proc. International Conf. on Artificial Neural Networks (ICANN-2001)",
_journal = "Artificial Neural Networks (ICANN-2001)",
_editor = "Georg Dorffner and Horst Bishof and Kurt Hornik",
publisher = "Springer",
series = "LNCS",
volume = "2130",
http = "http://www.hutter1.net/ai/pmarket.htm",
url = "http://arxiv.org/abs/cs.AI/0105025",
categories = "I.2. [Artificial Intelligence]",
keywords = "Hayek system; reinforcement learning; partial observable environment",
abstract = "Unlike traditional reinforcement learning (RL), market-based
RL is in principle applicable to worlds described by partially
observable Markov Decision Processes (POMDPs), where an agent needs
to learn short-term memories of relevant previous events in order to
execute optimal actions. Most previous work, however, has focused
on reactive settings (MDPs) instead of POMDPs. Here we reimplement
a recent approach to market-based RL and for the first time evaluate
it in a toy POMDP setting.",
znote = "Acceptance rate: 171/300 = 57\%",
}
@InProceedings{Hutter:01loss,
author = "Marcus Hutter",
title = "General Loss Bounds for Universal Sequence Prediction",
year = "2001",
pages = "210--217",
booktitle = "Proc. 18th International Conf. on Machine Learning (ICML-2001)",
address = "Williamstown, MA",
_editor = "Carla. E. Brodley and Andrea Pohoreckyj Danyluk",
publisher = "Morgan Kaufmann",
isbn = "1-55860-778-1",
ISSN = "1049-1910",
http = "http://www.hutter1.net/ai/ploss.htm",
url = "http://arxiv.org/abs/cs.AI/0101019",
categories = "I.2. [Artificial Intelligence],
I.2.6. [Learning],
I.2.8. [Problem Solving, Control Methods and Search],
F.1.3. [Complexity Classes].",
keywords = "Bayesian and deterministic prediction; general loss function;
Solomonoff induction; Kolmogorov complexity; leaning; universal
probability; loss bounds; games of chance; partial and delayed
prediction; classification.",
abstract = "The Bayesian framework is ideally suited for induction problems.
The probability of observing $x_k$ at time $k$, given past
observations $x_1...x_{k-1}$ can be computed with Bayes rule if
the true distribution $\mu$ of the sequences $x_1x_2x_3...$ is
known. The problem, however, is that in many cases one does not
even have a reasonable estimate of the true distribution. In order
to overcome this problem a universal distribution $\xi$ is defined
as a weighted sum of distributions $\mu_i\in M$, where $M$ is
any countable set of distributions including $\mu$. This is a
generalization of Solomonoff induction, in which $M$ is the set of
all enumerable semi-measures. Systems which predict $y_k$, given
$x_1...x_{k-1}$ and which receive loss $l_{x_k y_k}$ if $x_k$ is
the true next symbol of the sequence are considered. It is proven
that using the universal $\xi$ as a prior is nearly as good as
using the unknown true distribution $\mu$. Furthermore, games of
chance, defined as a sequence of bets, observations, and rewards
are studied. The time needed to reach the winning zone is
estimated. Extensions to arbitrary alphabets, partial and delayed
prediction, and more active systems are discussed.",
znote = "Acceptance rate: 80/249 = 32\%",
}
@InProceedings{Hutter:01alpha,
author = "Marcus Hutter",
title = "Convergence and Error bounds for Universal Prediction of Nonbinary Sequences",
booktitle = "Proc. 12th European Conf. on Machine Learning (ECML-2001)",
address = "Freiburg, Germany",
_editor = "Luc De Raedt and Peter Flach",
publisher = "Springer",
series = "LNAI",
volume = "2167",
isbn = "3-540-42536-5",
_month = dec,
year = "2001",
pages = "239--250",
http = "http://www.hutter1.net/ai/palpha.htm",
url = "http://arxiv.org/abs/cs.LG/0106036",
keywords = "Induction; Solomonoff, Bayesian, deterministic
prediction; Kolmogorov complexity; leaning; Loss function;
algorithmic information theory; universal probability",
abstract = "Solomonoff's uncomputable universal prediction scheme $\xi$ allows
to predict the next symbol $x_k$ of a sequence $x_1...x_{k-1}$ for
any Turing computable, but otherwise unknown, probabilistic
environment $\mu$. This scheme will be generalized to arbitrary
environmental classes, which, among others, allows the
construction of computable universal prediction schemes $\xi$.
Convergence of $\xi$ to $\mu$ in a conditional mean squared sense
and with $\mu$ probability $1$ is proven. It is shown that the
average number of prediction errors made by the universal $\xi$
scheme rapidly converges to those made by the best possible
informed $\mu$ scheme. The schemes, theorems and proofs are given
for general finite alphabet, which results in additional
complications as compared to the binary case.
Several extensions of the presented theory and
results are outlined. They include general loss functions and
bounds, games of chance, infinite alphabet, partial and delayed
prediction, classification, and more active
systems.",
znote = "Acceptance rate: 90/240 = 37\% (includes PKDD)",
}
@InProceedings{Hutter:01grep,
author = "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber",
title = "Gradient-based Reinforcement Planning in Policy-Search Methods",
year = "2001",
pages = "27--29",
address = "Utrecht, The Netherlands",
booktitle = "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)",
volume = "27",
_editor = "Marco A. Wiering",
publisher = "Onderwijsinsituut CKI, Utrecht Univ.",
_series = "Cognitieve Kunstmatige Intelligentie",
isbn = "90-393-2874-9",
ISSN = "1389-5184",
keywords = "Artificial intelligence, reinforcement learning, direct policy search,
planning, gradient decent.",
http = "http://www.hutter1.net/ai/pgrep.htm",
url = "http://arxiv.org/abs/cs.AI/0111060",
categories = "I.2. [Artificial Intelligence],
I.2.6. [Learning],
I.2.8. [Problem Solving, Control Methods and Search]",
abstract = "We introduce a learning method called ``gradient-based reinforcement
planning'' (GREP). Unlike traditional DP methods that improve their
policy backwards in time, GREP is a gradient-based method that plans
ahead and improves its policy {\em before} it actually acts in the
environment. We derive formulas for the exact policy gradient that
maximizes the expected future reward and confirm our ideas
with numerical experiments.",
}
@InProceedings{Hutter:01decision,
author = "Marcus Hutter",
title = "Universal Sequential Decisions in Unknown Environments",
year = "2001",
pages = "25--26",
address = "Utrecht, The Netherlands",
booktitle = "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)",
volume = "27",
_editor = "Marco A. Wiering",
publisher = "Onderwijsinsituut CKI, Utrecht Univ.",
_series = "Cognitieve Kunstmatige Intelligentie",
isbn = "90-393-2874-9",
ISSN = "1389-5184",
keywords = "Artificial intelligence, Rational agents,
sequential decision theory, universal Solomonoff induction,
algorithmic probability, reinforcement learning, computational
complexity, Kolmogorov complexity.",
url = "http://www.hutter1.net/ai/pdecision.htm",
categories = "I.2. [Artificial Intelligence],
I.2.6. [Learning],
I.2.8. [Problem Solving, Control Methods and Search],
F.1.3. [Complexity Classes],
F.2. [Analysis of Algorithms and Problem Complexity]",
abstract = "We give a brief introduction to the AIXI model, which unifies and
overcomes the limitations of sequential decision theory and
universal Solomonoff induction. While the former theory is suited
for active agents in known environments, the latter is suited for
passive prediction of unknown environments.",
abstract2 = "Decision theory formally solves the problem of rational agents in
uncertain worlds if the true environmental probability
distribution is known. Solomonoff's theory of universal induction
formally solves the problem of sequence prediction for unknown
distribution. We unify both theories and give strong arguments
that the resulting universal AIXI model behaves optimal in any
computable environment.",
}
@InProceedings{Hutter:01aixi,
author = "Marcus Hutter",
title = "Towards a Universal Theory of Artificial Intelligence based on Algorithmic
Probability and Sequential Decisions",
year = "2001",
pages = "226--238",
booktitle = "Proc. 12th European Conf. on
Machine Learning (ECML-2001)",
address = "Freiburg, Germany",
_editor = "Luc De Raedt and Peter Flach",
publisher = "Springer",
series = "LNAI",
volume = "2167",
isbn = "3-540-42536-5",
keywords = "Artificial intelligence, Rational agents,
sequential decision theory, universal Solomonoff induction,
algorithmic probability, reinforcement learning, computational
complexity, theorem proving, probabilistic reasoning, Kolmogorov
complexity, Levin search.",
http = "http://www.hutter1.net/ai/paixi.htm",
url = "http://arxiv.org/abs/cs.AI/0012011",
categories = "I.2. [Artificial Intelligence],
I.2.3. [Deduction and Theorem Proving],
I.2.6. [Learning],
I.2.8. [Problem Solving, Control Methods and Search],
F.1.3. [Complexity Classes],
F.2. [Analysis of Algorithms and Problem Complexity]",
abstract = "Decision theory formally solves the problem of rational agents in
uncertain worlds if the true environmental probability
distribution is known. Solomonoff's theory of universal induction
formally solves the problem of sequence prediction for unknown
distribution. We unify both theories and give strong arguments
that the resulting universal AIXI model behaves optimally in any
computable environment. The major drawback of the AIXI model is
that it is uncomputable. To overcome this problem, we construct a
modified algorithm AIXI^tl, which is still superior to any
other time t and space l bounded agent. The computation time
of AIXI^tl is of the order t x 2^l.",
znote = "Acceptance rate: 90/240 = 37\% (includes PKDD)",
}
@Article{Hutter:01errbnd,
author = "Marcus Hutter",
title = "New Error Bounds for {Solomonoff} Prediction",
year = "2001",
volume = "62",
number = "4",
pages = "653--667",
journal = "Journal of Computer and System Sciences",
address = "Manno(Lugano), Switzerland",
keywords = "Kolmogorov Complexity, Solomonoff Prediction, Error
Bound, Induction, Learning, Algorithmic Information
Theory, Bayes",
http = "http://www.hutter1.net/ai/perrbnd.htm",
url = "http://arxiv.org/abs/cs.AI/9912008",
abstract = "Several new relations between Solomonoff prediction
and Bayesian prediction and general probabilistic
prediction schemes will be proved. Among others they
show that the number of errors in Solomonoff prediction
is finite for computable prior probability, if finite
in the Bayesian case. Deterministic variants will also
be studied. The most interesting result is that the
deterministic variant of Solomonoff prediction is
optimal compared to any other probabilistic or
deterministic prediction scheme apart from additive
square root corrections only. This makes it well suited
even for difficult prediction problems, where it does
not suffice when the number of errors is minimal to
within some factor greater than one. Solomonoff's
original bound and the ones presented here complement
each other in a useful way.",
}
%-------------Publications-of-Marcus-Hutter-2000--------------%
@Article{Hutter:00speed,
author = "Marcus Hutter",
title = "An effective Procedure for Speeding up Algorithms",
year = "10 pages, 2001",
journal = "Presented at the 3rd Workshop on Algorithmic Information Theory (TAI-2001)",
http = "http://www.hutter1.net/ai/pspeed.htm",
url = "http://arxiv.org/abs/cs.CC/0102018",
keywords = "Acceleration, Computational Complexity,
Algorithmic Information Theory, Blum's Speed-up, Levin Search.",
abstract = "The provably asymptotically fastest algorithm within a factor of 5
for formally described problems will be constructed. The main idea
is to enumerate all programs provably equivalent to the original
problem by enumerating all proofs. The algorithm could be
interpreted as a generalization and improvement of Levin search,
which is, within a multiplicative constant, the fastest algorithm
for inverting functions. Blum's speed-up theorem is avoided by
taking into account only programs for which a correctness proof
exists. Furthermore, it is shown that the fastest program that
computes a certain function is also one of the shortest programs
provably computing this function. To quantify this statement, the
definition of Kolmogorov complexity is extended, and two new
natural measures for the complexity of a function are defined.",
}
@TechReport{Hutter:00kcunai,
author = "Marcus Hutter",
title = "A Theory of Universal Artificial Intelligence based on Algorithmic Complexity",
number = "cs.AI/0004001",
_month = apr,
year = "2000",
institution = "M{\"u}nchen, 62 pages",
keywords = "Artificial intelligence, algorithmic complexity,
sequential decision theory; induction; Solomonoff; Kolmogorov;
Bayes; reinforcement learning; universal sequence prediction;
strategic games; function minimization; supervised learning.",
url = "http://arxiv.org/abs/cs.AI/0004001",
http = "http://www.hutter1.net/ai/pkcunai.htm",
abstract = "Decision theory formally solves the problem of rational agents in
uncertain worlds if the true environmental prior probability
distribution is known. Solomonoff's theory of universal induction
formally solves the problem of sequence prediction for unknown
prior distribution. We combine both ideas and get a parameterless
theory of universal Artificial Intelligence. We give strong
arguments that the resulting AIXI model is the most intelligent
unbiased agent possible. We outline for a number of problem
classes, including sequence prediction, strategic games, function
minimization, reinforcement and supervised learning, how the
AIXI model can formally solve them. The major drawback of the
AIXI model is that it is uncomputable. To overcome this
problem, we construct a modified algorithm AIXI-tl, which is
still effectively more intelligent than any other time t and
space l bounded agent. The computation time of AIXI-tl
is of the order tx2^l. Other discussed topics are formal
definitions of intelligence order relations, the horizon problem
and relations of the AIXI theory to other AI approaches.",
note = "http://arxiv.org/abs/cs.AI/0004001",
}
%----------Publications-of-Marcus-Hutter-1987-1999------------%
@Article{Hutter:97instanto,
author = "Marcus Hutter",
title = "Instantons and Meson Correlators in {QCD}",
year = "1997",
pages = "131--143",
journal = "Zeitschrift f{\"u}r Physik C Particle and Fields",
volume = "74",
number = "1",
issn = "0170-9739",
doi = "10.1007/s002880050376",
url = "http://arxiv.org/abs/hep-ph/9501245",
http = "http://www.hutter1.net/physics/pinstant.htm",
abstract = "Various QCD correlators are calculated in the instanton liquid model
in zeromode approximation and $1/N_c$ expansion. Previous works are
extended by including dynamical quark loops. In contrast to the
original ``perturbative'' $1/N_c$ expansion not all quark loops are
suppressed. In the flavor singlet meson correlators a chain of quark
bubbles survives the $N_c\to\infty$ limit causing a massive
$\eta^\prime$ in the pseudoscalar correlator while keeping massless
pions in the triplet correlator. The correlators are plotted and
meson masses and couplings are obtained from a spectral fit. They
are compared to the values obtained from numerical studies of the
instanton liquid and to experimental results.",
}
@Article{Hutter:97family,
author = "Andreas Blumhofer and Marcus Hutter",
title = "Family Structure from Periodic Solutions of an Improved Gap Equation",
journal = "Nuclear Physics",
volume = "B484",
year = "1997",
pages = "80--96",
doi = "10.1016/S0550-3213(96)00644-X",
issn = "0550-3213",
url = "http://arxiv.org/abs/hep-ph/9605393",
http = "http://www.hutter1.net/physics/pfamily.htm",
abstract = "Fermion mass models usually contain a horizontal symmetry and
therefore fail to predict the exponential mass spectrum of the Standard
Model in a natural way. In dynamical symmetry breaking there are
different concepts to introduce a fermion mass spectrum, which
automatically has the desired hierarchy. In constructing a specific
model we show that in some modified gap equations periodic solutions
with several fermion poles appear. The stability of these excitations
and the application of this toy model are discussed. The mass ratios
turn out to be approximately e^pi and e^2pi. Thus the model explains
the large ratios of fermion masses between successive generations in
the Standard Model without introducing large or small numbers by hand.",
note = "Missing figures in B494 (1997) 485",
}
@PhdThesis{Hutter:96thesis,
author = "Marcus Hutter",
school = "Faculty for Theoretical Physics, LMU Munich",
title = "Instantons in QCD: Theory and application of the instanton liquid model",
year = "1996",
pages = "1--100",
url = "http://arxiv.org/abs/hep-ph/0107098 ",
http = "http://www.hutter1.net/physics/pdise.htm",
abstract = "Numerical and analytical studies of the instanton liquid model have
allowed the determination of many hadronic parameters during the
last 13 years. Most part of this thesis is devoted to the extension
of the analytical methods. The meson correlation (polarization)
functions are calculated in the instanton liquid model including
dynamical quark loops. The correlators are plotted and masses and
couplings of the sigma, rho, omega, a1 and f1 are obtained from a
spectral fit. A separated analysis allows the determination of the
eta' mass too. The results agree with the experimental values on
a 10% level. Further I give some predictions for the proton form
factors, which are related to the proton spin (problem). A gauge
invariant gluon mass for small momenta is also calculated. At the
end of the work some predictions are given, which do not rely on
the instanton liquid model. A gauge invariant quark propagator is
calculated in the one instanton background and is compared to the
regular and singular propagator. An introduction to the skill of
choosing a suitable gauge, especially a criterion for choosing regular
or singular gauge, is given. An application is the derivation of a
finite relation between the quark condensate and the QCD scale Lambda,
where neither an infrared cutoff nor a specific instanton model has
been used. In general the instanton liquid model exhibits an astonishing
internal consistency and a good agreement with the experimental data.",
note = "Translated from the German original http://www.hutter1.net/physics/pdiss.htm",
}
@PhdThesis{Hutter:96diss,
author = "Marcus Hutter",
school = "Fakult{\"a}t f{\"u}r Theoretische Physik, LMU M{\"u}nchen",
title = "Instantonen in der QCD: Theorie und Anwendungen des Instanton-Fl{\"u}ssigkeit-Modells",
year = "1996",
pages = "1--105",
url = "http://arxiv.org/abs/hep-ph/9603280",
http = "http://www.hutter1.net/physics/pdiss.htm",
abstract = "Durch numerische Simulation des Instanton-Flüssigkeit-Modells
konnten eine Reihe hadronischer Größen in den letzten 13 Jahren
bestimmt werden. Der größte Teil dieser Arbeit ist der Erweiterung
der analytischen Methoden gewidmet. Die Meson-Korrelatoren
(auch Polarisations-Funktionen genannt) werden im Instanton-Flüssigkeits-Modell
berechnet, wobei dynamische Quark-Schleifen berücksichtigt werden.
Die Korrelatoren werden grafisch dargestellt und die Massen und Kopplungen
der sigma, rho, omega, a1 und f1 Mesonen werden mit Hilfe eines spektralen
Fits bestimmt. Eine gesonderte Betrachtung ermöglicht auch die Berechnung
der eta' Masse. Die Ergebnisse stimmen auf 10% Niveau mit den experimentellen
Werten überein. Weiterhin wird versucht, die axialen Formfaktoren des Protons
zu bestimmen. Diese stehen in Zusammenhang mit dem Proton-Spin(-Problem).
Eine eichinvariante Gluon-Masse wird für kleine Impulse berechnet.
Die Arbeit wird abgeschlossen mit einigen Vorhersagen, die sich nicht
speziell auf das Instanton-Flüssigkeits-Modell stützen. Im
ein-Instanton-Vakuum wird ein eichinvarianter Quark-Propagator berechnet
und mit dem regulüren und dem singulären Propagator verglichen.
Kriterien für die Wahl einer geeignete Eichung, insbesondere für die
Wahl der singulären oder der regulüren Eichung, werden gegeben.
Eine Anwendung ist die Herleitung einer endlichen Relation zwischen
dem Quark-Kondensat und der QCD-Skala Lambda, wobei weder ein
Infrarot-Cutoff noch ein spezifisches Instanton-Modell verwendet werden.
Allgemein weist das Instanton-Flüssigkeits-Modell eine erstaunliche interne
Konsistenz und gute Übereinstimmung mit experimentellen Daten auf.",
note = "English translation available at http://www.hutter1.net/physics/pdise.htm",
}
@Article{Hutter:96eta,
author = "Marcus Hutter",
title = "The mass of the $\eta'$ in self-dual {QCD}",
year = "1996",
pages = "275--278",
journal = "Physics Letters B",
volume = "B367",
issn = "0370-2693",
doi = "10.1016/0370-2693(95)01411-X",
url = "http://arxiv.org/abs/hep-ph/9509401",
http = "http://www.hutter1.net/physics/petamas.htm",
abstract = "The QCD gauge field is modeled as an ensemble of statistically
independent selfdual and antiselfdual regions. This model is
motivated from instanton physics. The scale anomaly then allows
to relate the topological susceptibility to the gluon condensate.
With the help of Wittens formula for m_eta' and an estimate of
the suppression of the gluon condensate due to light quarks the
mass of the eta' can be related to f_pi and the physical gluon
condensate. We get the quite satisfactory value m_eta'=884+-116 MeV.
Using the physical eta' mass as an input it is in principle possible
to get information about the interaction between instantons and
anti-instantons.",
}
@TechReport{Hutter:95spin,
author = "Marcus Hutter",
number = "LMU-95-15",
institution = "Theoretische Physik, LMU M{\"u}nchen",
title = "Proton Spin in the Instanton Background",
year = "1995",
url = "http://arxiv.org/abs/hep-ph/9509402",
http = "http://www.hutter1.net/physics/pspin.htm",
abstract = "The proton form factors are reduced to vacuum correlators
of 4 quark fields by assuming independent constituent
quarks. The axial singlet quark and gluonic form factors
are calculated in the instanton liquid model. A discussion
of gauge(in)dependence is given.",
note = "15 pages",
}
@TechReport{Hutter:95prop,
author = "Marcus Hutter",
number = "LMU-95-03",
institution = "Theoretische Physik, LMU M{\"u}nchen",
title = "Gauge Invariant Quark Propagator in the Instanton Background",
year = "1995",
url = "http://arxiv.org/abs/hep-ph/9502361",
http = "http://www.hutter1.net/physics/pprop.htm",
abstract = "After a general discussion on the choice of gauge, we compare
the quark propagator in the background of one instanton in
regular and singular gauge with a gauge invariant propagator
obtained by inserting a path-ordered gluon exponential.
Using a gauge motivated by this analysis, we were able to
obtain a finite result for the quark condensate without
introducing an infrared cutoff nor invoking some instanton
model.",
note = "15 pages",
}
@TechReport{Hutter:93gluon,
author = "Marcus Hutter",
number = "LMU-93-18",
institution = "Theoretische Physik, LMU M{\"u}nchen",
title = "Gluon Mass from Instantons",
year = "1993",
url = "http://arxiv.org/abs/hep-ph/9501335",
http = "http://www.hutter1.net/physics/pgluon.htm",
abstract = "The gluon propagator is calculated in the instanton background
in a form appropriate for extracting the momentum dependent
gluon mass. In background-xi-gauge we get for the mass 400 MeV
for small p^2 independent of the gauge parameter xi.",
note = "13 pages",
}
@MastersThesis{Hutter:91cfs,
author = "Marcus Hutter",
school = "Theoretische Informatik, TU M{\"u}nchen",
title = "{I}mplementierung eines {K}lassifizierungs-{S}ystems",
year = "1991",
url = "http://www.hutter1.net/ai/pcfs.htm",
ps = "http://www.hutter1.net/ai/pcfs.ps",
pdf = "http://www.hutter1.net/ai/pcfs.pdf",
code = "http://www.hutter1.net/ai/cfssim.c",
codex = "http://www.hutter1.net/ai/cfsexmpl.c",
abstract = "A classifier system is a massively parallel rule based system,
whose components (classifier) can exchange messages, whose behavior is
is assessed by a teacher (reinforcement), and which is able to learn by
means of credit assignment and a genetic algorithm. For an introduction
we have to refer to the, meanwhile extensive, literature; see especially
Goldberg (1989). The concept of a classifier system was first developed
by Holland (1986), but meanwhile a multitude of variants and extensions
exist (Booker et. al, 1989). So far it is impossible to
compare these variants in their performance, statements on the
quality of the various approaches are, hence, hard to impossible.
The program developed in this diploma thesis allows, for the first time,
a direct comparison of the most important variants.
The thesis describes the program, in which we have taken special attention
to an efficient implementation.",
zusammenfassung = "Ein Klassifizierungssystem (CFS, engl. Classifiersystem) ist
ein massiv paralleles regelbasiertes System, dessen Komponenten
(Classifier) Nachrichten (Messages) austauschen können, dessen
Verhalten von einem Lehrer beurteilt wird (Reinforcement) und
das mittels Credit-Assignment und genetischen Algorithmen fähig
ist zu lernen. Für eine einführende Darstellung muß auf die
inzwischen sehr umfangreiche Literatur, insbesondere Goldberg (1989),
verwiesen werden. Das Konzept des CFS wurde zuerst von Holland (1986)
entwickelt, inzwischen gibt es aber eine Vielzahl von Varianten und
Erweiterungen (Booker et. al (1989). Bisher ist es nicht möglich,
diese Varianten in ihrer Performance zu vergleichen, eine Aussage
über die Güte der verschiedenen Ansätze ist somit kaum oder
überhaupt nicht möglich. Das in dieser Diplomarbeit erstellte
Programm gestattet erstmals bzgl. der wichtigsten Varianten einen
direkten Vergleich. In den folgenden Kapiteln wird dieses Programm,
bei dem besonders auf eine effiziente Implementierung geachtet wurde,
beschrieben.",
note = "72 pages with C listing, in German",
}
@TechReport{Hutter:90faka,
author = "Marcus Hutter",
institution = "Universit{\"a}t Erlangen-N{\"u}rnberg \&
Technische Universit{\"a}t M{\"u}nchen",
title = "{P}arallele {A}lgorithmen in der {S}tr{\"o}mungsmechanik",
type = "{F}erienakademie: {N}umerische {M}ethoden der {S}tr{\"o}mungsmechanik",
year = "1990",
url = "http://www.hutter1.net/official/faka.htm",
note = "10 pages, in German",
}
@TechReport{Hutter:90fopra,
author = "Marcus Hutter",
institution = "Theoretische Informatik, TU M{\"u}nchen",
title = "A Reinforcement Learning {H}ebb Net",
year = "1990",
type = "Fortgeschrittenenpraktikum",
url = "http://www.hutter1.net/ai/fopra.htm",
ftp = "http://www.hutter1.net/ai/fopra.ps.zip",
pdf = "http://www.hutter1.net/ai/fopra.pdf",
code = "http://www.hutter1.net/ai/fopra.pas",
abstract = "This Fopra is motivated by the following observations about
human learning and about human neural information processing.
On the one hand humans are able to learn supervised, unsupervised
and by reinforcement, on the other hand there is no neural
distinction between informative, uninformative and evaluative
feedback. Furthermore, the Hebb learning rule is the only
biological inspired learning mechanism. If the human brain
is indeed a Hebb net this would imply that Hebb nets are
able to learn by reinforcement. The goal of this Fopra is
to investigate whether and how Hebb nets could be used for
reinforcement learning. It is shown that Hebb nets with a
suitable prior net topology can indeed learn, at least
simple tasks, by reinforcement.",
note = "30 pages with Pascal listing, in German",
}
@Article{Hutter:87cad,
author = "Marcus Hutter",
title = "Fantastische {3D-Graphik} mit dem {CPC-Giga-CAD}",
journal = "7. Schneider Sonderheft, Happy Computer, Sonderheft 16",
publisher = "Markt\&Technik",
year = "1987",
pages = "41--92",
url = "http://www.hutter1.net/gigacad/gigacad.htm",
abstract = "CAD steht fur Computer Aided Design. Bis heute war dieses
Gebiet hauptsächlich Domäne der Großrechner.
Mit $\gg$CPC-Giga-CAD$\ll$ wird auch auf dem Schneider CPC
automatisiertes und computergestütztes Zeichnen und
Konstruieren zum Kinderspiel.",
}
© 2000 by ... |
| ... Marcus Hutter |