Commit a385d65f authored by Valentin Reis's avatar Valentin Reis
Browse files

Merge branch 'develop' into 'master'

Namespace switch (HBandit -> Bandit)

See merge request !4
parents edb0fca8 b2d25f83
Pipeline #10325 passed with stages
in 1 minute and 56 seconds
......@@ -42,6 +42,13 @@ nix:package:
script: nix-build -A hbandit --no-build-output
make:readme:
stage: build
tags:
- kvm
- nix
script: nix-shell -p gnumake --run "make README.md"
make:doc:
stage: build
tags:
......
......@@ -80,7 +80,7 @@ ghcid-test: hbandit.cabal .hlint.yaml hbandit.nix
'
.PHONY: pre-commit
pre-commit: ormolu dhall-format shellcheck
pre-commit: ormolu dhall-format shellcheck src/Bandit/Tutorial.hs README.md
.PHONY: shellcheck
shellcheck:
......@@ -91,7 +91,7 @@ shellcheck:
'
.PHONY: hlint
dhall-format:
hlint:
@nix-shell --pure -p hlint --run bash <<< '
hlint src/ --hint=./.hlint.yaml
'
......@@ -123,7 +123,7 @@ ormolu:
}
' --run bash <<< '
RETURN=0
for F in $$(fd -E src/HBandit/Tutorial.hs -e hs); do
for F in $$(fd -E src/Bandit/Tutorial.hs -e hs); do
ormolu -o -XTypeApplications -o -XPatternSynonyms -m check $$F
if [ $$? -ne 0 ]; then
echo "[!] $$F does not pass ormolu format check. Formatting.." >&2
......@@ -135,7 +135,7 @@ ormolu:
'
.PHONY: doc
doc: src/HBandit/Tutorial.hs hbandit.cabal hbandit.nix
doc: src/Bandit/Tutorial.hs hbandit.cabal hbandit.nix
@nix-shell -E '
with import <nixpkgs> {};
with haskellPackages;
......@@ -147,8 +147,8 @@ doc: src/HBandit/Tutorial.hs hbandit.cabal hbandit.nix
cabal v2-haddock hbandit --haddock-internal --builddir=.build
'
.PRECIOUS: src/HBandit/Tutorial.hs
src/HBandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
.PRECIOUS: src/Bandit/Tutorial.hs
src/Bandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
@nix-shell --pure -E '
with import <nixpkgs> {};
with haskellPackages;
......@@ -176,6 +176,7 @@ src/HBandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
aeson
pretty-simple
panhandle
pandoc-citeproc
panpipe
unlit
pandoc
......@@ -188,12 +189,35 @@ src/HBandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
pandoc --filter $$(which panpipe) --filter $$(which panhandle) -f markdown+lhs -t markdown+lhs $< | unlit -f bird > $@
'
README.md: literate/readme.md
@nix-shell --pure -E '
with import <nixpkgs> {};
with haskellPackages;
mkShell {
name="pandoc-tools";
buildInputs = [
inline-r
aeson
pretty-simple
panhandle
pandoc-citeproc
panpipe
pandoc
pkgs.which
cabal-install
R];
R_LIBS_SITE = "$${builtins.readFile r-libs-site}";
}
' --run bash <<< '
pandoc -t markdown_strict --filter $$(which pandoc-citeproc) -s $< -o $@
'
.PHONY:clean
clean:
rm -rf .build
rm -rf dist*
rm -f literate/main.hs
rm -f src/HBandit/Tutorial.hs
rm -f src/Bandit/Tutorial.hs
rm -f hbandit.nix
rm -f hbandit.cabal
rm -rf dhall-to-cabal
# hbandit
hbandit
=======
Safe Multi-Armed Bandit implementations: EXP3, BwCR (wip), Exp4.R
Safe multi-armed bandit implementations:
- Eps-Greedy (fixed rate)
- EXP3 (hyperparameter-free rate from \[[1](#ref-bubeck2012regret)\])
- Exp4.R \[[2](#ref-sun2017safety)\]
documentation
-------------
nix-build /path/to/hbandit/or/url/to/tarball -A hbandit.doc
<!-- vim: set ft=markdown.pandoc cole=0: -->
\[1\] Bubeck, S. et al. 2012. Regret analysis of stochastic and
nonstochastic multi-armed bandit problems. *Foundations and Trends in
Machine Learning*. 5, 1 (2012), 1–122.
\[2\] Sun, W. et al. 2017. Safety-aware algorithms for adversarial
contextual bandit. *Proceedings of the 34th international conference on
machine learning-volume 70* (2017), 3280–3288.
......@@ -24,15 +24,14 @@ source-repository head
library
exposed-modules:
HBandit
HBandit.BwCR
HBandit.Class
HBandit.EpsGreedy
HBandit.Exp3
HBandit.Exp4R
HBandit.Types
HBandit.Tutorial
HBandit.Util
Bandit
Bandit.Class
Bandit.EpsGreedy
Bandit.Exp3
Bandit.Exp4R
Bandit.Types
Bandit.Tutorial
Bandit.Util
hs-source-dirs: src
default-language: Haskell2010
default-extensions: LambdaCase QuasiQuotes DefaultSignatures
......
......@@ -117,15 +117,14 @@ let deps =
}
let allmodules =
[ "HBandit"
, "HBandit.BwCR"
, "HBandit.Class"
, "HBandit.EpsGreedy"
, "HBandit.Exp3"
, "HBandit.Exp4R"
, "HBandit.Types"
, "HBandit.Tutorial"
, "HBandit.Util"
[ "Bandit"
, "Bandit.Class"
, "Bandit.EpsGreedy"
, "Bandit.Exp3"
, "Bandit.Exp4R"
, "Bandit.Types"
, "Bandit.Tutorial"
, "Bandit.Util"
]
let libdep =
......
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-US">
<info>
<title>ACM SIG Proceedings ("et al." for 3+ authors)</title>
<id>http://www.zotero.org/styles/acm-sig-proceedings</id>
<link href="http://www.zotero.org/styles/acm-sig-proceedings" rel="self"/>
<link href="http://www.acm.org/sigs/publications/proceedings-templates" rel="documentation"/>
<author>
<name>Naeem Esfahani</name>
<email>nesfaha2@gmu.edu</email>
<uri>http://mason.gmu.edu/~nesfaha2/</uri>
</author>
<contributor>
<name>Chris Horn</name>
<email>chris.horn@securedecisions.com</email>
</contributor>
<contributor>
<name>Patrick O'Brien</name>
</contributor>
<category citation-format="numeric"/>
<category field="science"/>
<category field="engineering"/>
<updated>2017-07-15T11:28:14+00:00</updated>
<rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
</info>
<macro name="author">
<choose>
<if type="webpage">
<text variable="title" suffix=":"/>
</if>
<else>
<names variable="author">
<name name-as-sort-order="all" and="text" sort-separator=", " initialize-with="." delimiter-precedes-last="never" delimiter=", "/>
<label form="short" prefix=" "/>
<substitute>
<names variable="editor"/>
<names variable="translator"/>
</substitute>
</names>
</else>
</choose>
</macro>
<macro name="editor">
<names variable="editor">
<name initialize-with="." delimiter=", " and="text"/>
<label form="short" prefix=", "/>
</names>
</macro>
<macro name="access">
<choose>
<if type="article-journal" match="any">
<text variable="DOI" prefix=". DOI:https://doi.org/"/>
</if>
</choose>
</macro>
<citation collapse="citation-number">
<sort>
<key variable="citation-number"/>
</sort>
<layout prefix="[" suffix="]" delimiter=", ">
<text variable="citation-number"/>
</layout>
</citation>
<bibliography entry-spacing="0" second-field-align="flush" et-al-min="3" et-al-use-first="1">
<sort>
<key macro="author"/>
<key variable="title"/>
</sort>
<layout suffix=".">
<text variable="citation-number" prefix="[" suffix="]"/>
<text macro="author" suffix=" "/>
<date variable="issued" suffix=". ">
<date-part name="year"/>
</date>
<choose>
<if type="paper-conference">
<group delimiter=". ">
<text variable="title"/>
<group delimiter=" ">
<text variable="container-title" font-style="italic"/>
<group delimiter=", ">
<group delimiter=", " prefix="(" suffix=")">
<text variable="publisher-place"/>
<date variable="issued">
<date-part name="month" form="short" suffix=" "/>
<date-part name="year"/>
</date>
</group>
<text variable="page"/>
</group>
</group>
</group>
</if>
<else-if type="article-journal">
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<group delimiter=", ">
<text variable="volume"/>
<group delimiter=" ">
<text variable="issue"/>
<date variable="issued" prefix="(" suffix=")">
<date-part name="month" form="short" suffix=" "/>
<date-part name="year"/>
</date>
</group>
<text variable="page"/>
</group>
</group>
</else-if>
<else-if type="patent">
<group delimiter=". ">
<text variable="title"/>
<text variable="number"/>
<date variable="issued">
<date-part name="month" form="short" suffix=" "/>
<date-part name="day" suffix=", "/>
<date-part name="year"/>
</date>
</group>
</else-if>
<else-if type="thesis">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="archive_location" prefix="Doctoral Thesis #"/>
<text variable="publisher"/>
</group>
</else-if>
<else-if type="report">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="number" prefix="Technical Report #"/>
<text variable="publisher"/>
</group>
</else-if>
<else-if type="webpage">
<group delimiter=". ">
<text variable="URL" font-style="italic"/>
<date variable="accessed" prefix="Accessed: ">
<date-part name="year" suffix="-"/>
<date-part name="month" form="numeric-leading-zeros" suffix="-"/>
<date-part name="day" form="numeric-leading-zeros"/>
</date>
</group>
</else-if>
<else-if type="chapter paper-conference" match="any">
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<text macro="editor"/>
<text variable="publisher"/>
<text variable="page"/>
</group>
</else-if>
<else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="publisher"/>
</group>
</else-if>
<else>
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<text variable="publisher"/>
</group>
</else>
</choose>
<text macro="access"/>
</layout>
</bibliography>
</style>
---
bibliography: literate/refs.bib
csl: literate/acm.csl
link-citations: true
---
# hbandit
Safe multi-armed bandit implementations:
- Eps-Greedy (fixed rate)
- EXP3 (hyperparameter-free rate from @bubeck2012regret)
- Exp4.R @sun2017safety
## documentation
```
nix-build /path/to/hbandit/or/url/to/tarball -A hbandit.doc
```
<!-- vim: set ft=markdown.pandoc cole=0: -->
@article{bubeck2012regret,
title={Regret analysis of stochastic and nonstochastic multi-armed bandit problems},
author={Bubeck, S{\'e}bastien and Cesa-Bianchi, Nicolo and others},
journal={Foundations and Trends{\textregistered} in Machine Learning},
volume={5},
number={1},
pages={1--122},
year={2012},
publisher={Now Publishers, Inc.}
}
@inproceedings{sun2017safety,
title={Safety-aware algorithms for adversarial contextual bandit},
author={Sun, Wen and Dey, Debadeepta and Kapoor, Ashish},
booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
pages={3280--3288},
year={2017},
organization={JMLR. org}
}
literate/regretPlot.png

69.1 KB | W: | H:

literate/regretPlot.png

66.6 KB | W: | H:

literate/regretPlot.png
literate/regretPlot.png
literate/regretPlot.png
literate/regretPlot.png
  • 2-up
  • Swipe
  • Onion skin
literate/summaryPlot.png

7.89 KB | W: | H:

literate/summaryPlot.png

7.64 KB | W: | H:

literate/summaryPlot.png
literate/summaryPlot.png
literate/summaryPlot.png
literate/summaryPlot.png
  • 2-up
  • Swipe
  • Onion skin
String manipulation, the bane
the string manipulating bash snippets used in this literate file are:
```{.bash pipe="tee -a execute.sh"}
#usage: cat content | execute.sh section_identifier
......@@ -10,7 +10,6 @@ String manipulation, the bane
echo "> -- \$$1"
```
```{.bash pipe="tee -a ggplot.sh"}
#usage: cat content | ggplot.sh filename width height
echo '> -- |'
......@@ -20,7 +19,12 @@ String manipulation, the bane
echo "> -- \$$1"
```
We print a warning in the output:
> -- Do not modify. This file has been automatically generated from file literate/tutorial.md,
> -- your changes will be erased.
cabal packaging for the executable file:
```{.hidden pipe="tee -a Tmodule.hs > /dev/null"}
{- cabal:
build-depends:
......@@ -41,10 +45,10 @@ build-depends:
```
> {-| This module serves as an introduction to the HBandit Multi-Armed Bandit library.
> {-| This module serves as an introduction to the `hbandit` Multi-Armed Bandit library.
> -}
>
> module HBandit.Tutorial (
> module Bandit.Tutorial (
> -- *** Setup
>
> -- | The code snippets displayed in this tutorial require the following list of extensions and modules.
......@@ -79,9 +83,9 @@ import System.IO hiding (print)
import Control.Monad.Primitive
import qualified Language.R.Instance as R
import Control.Lens
import HBandit
import HBandit.EpsGreedy
import HBandit.Exp3
import Bandit
import Bandit.EpsGreedy
import Bandit.Exp3
import qualified Data.Text.Lazy.Encoding as T
import qualified Data.Text.Lazy as T
import Data.Aeson hiding ((.=))
......@@ -126,14 +130,14 @@ main = do
> -- types for a basic bandit game between a learner and an environment, where the
> -- learner has access to a random generator and is defined via a stateful 'step'
> -- function. All non-contetual bandit algorithms in this library are instances of this.
> HBandit.Class.Bandit(..)
> Bandit.Class.Bandit(..)
>
> -- *** Example instance: Epsilon-Greedy
> --
> -- | Let's take a look at the instance for the classic fixed-rate \(\epsilon\)-Greedy
> -- algorithm. The necessary hyperparameters are the number of arms and the rate value,
> -- as the 'EpsGreedyHyper' datatype shows.
> ,HBandit.EpsGreedy.EpsGreedyHyper(..)
> ,Bandit.EpsGreedy.EpsGreedyHyper(..)
>
> -- | Let's use that instance on some toy data with a few rounds.
> --
......@@ -159,7 +163,7 @@ onePass :: (Bandit b hyper a l) =>
GameState b a l
onePass hyper g adversary = runGame initialGame
where
(initialBanditState, initialAction, g') = HBandit.init g hyper
(initialBanditState, initialAction, g') = Bandit.init g hyper
initialGame = GameState
{ historyActions = [initialAction],
historyLosses = [],
......@@ -186,7 +190,7 @@ runOnePassEG :: StdGen -> GameState (EpsGreedy Bool) Bool Double
runOnePassEG g = onePass hyper g (getZipList $ f <$> ZipList [40, 2, 10] <*> ZipList [4, 44 ,3] )
where
f a b = \case True -> a; False -> b
hyper = EpsGreedyHyper {epsilon = 0.5, arms = HBandit.Arms [True, False]}
hyper = EpsGreedyHyper {epsilon = 0.5, arms = Bandit.Arms [True, False]}
printOnePassEG :: IO ()
printOnePassEG = putText $
......@@ -204,17 +208,17 @@ printOnePassEG = putText $
```
> -- *** Other classes
> -- | Some other, more restrictive classes are available in [HBandit.Class](HBandit-Class.html) for convenience. See for
> -- example 'HBandit.Class.ParameterFreeMAB', which exposes a hyperparameter-free interface for
> -- | Some other, more restrictive classes are available in [Bandit.Class](Bandit-Class.html) for convenience. See for
> -- example 'Bandit.Class.ParameterFreeMAB', which exposes a hyperparameter-free interface for
> -- algorithms that don't need any information besides the arm count. Those instances are not necessary
> -- per se, and the 'Bandit' class is always sufficient. Note that some instances make agressive use
> -- of type refinement (See e.g. HBandit.Exp3.Exp3) through the 'Refined' package.
> -- of type refinement (See e.g. Bandit.Exp3.Exp3) through the 'Refined' package.
> -- In particular, we are about to make use of the \(\left[0,1\right]\) interval through the 'ZeroOne'
> -- type alias.
> ,HBandit.Types.ZeroOne
> ,Bandit.Types.ZeroOne
> -- ** Algorithm comparison
> -- | This subsection runs bandit experiments on an example dataset with some of the instances for 'HBandit.Bandit'.
> -- | This subsection runs bandit experiments on an example dataset with some of the @Bandit@ instances.
> -- The data for this tutorial is generated in R using the [inline-r](https://hackagehaskell.org/package/inline-r) package.
> -- Let's define a simple problem with three gaussian arms. We will threshold all cost values to \(\left[0,1\right]\).
......@@ -273,14 +277,14 @@ toAdversary xss = Protolude.transpose xss <&> listToAdversary
exp3 :: [[Double]] -> StdGen -> GameState (Exp3 Int) Int (ZeroOne Double)
exp3 dataset g =
onePass
(HBandit.Arms [0..2])
(Bandit.Arms [0..2])
g
(toAdversary $ refineDataset dataset)
greedy :: [[Double]] -> StdGen -> Double -> GameState (EpsGreedy Int) Int (Double)
greedy dataset g eps =
onePass
(EpsGreedyHyper {epsilon = eps, arms = HBandit.Arms [0..2]})
(EpsGreedyHyper {epsilon = eps, arms = Bandit.Arms [0..2]})
g
(toAdversary dataset)
......@@ -335,21 +339,22 @@ instance Monoid Reducer where
> ) where
> import HBandit.Class
> import HBandit.Types
> import HBandit.EpsGreedy
> import Bandit.Class
> import Bandit.Types
> import Bandit.EpsGreedy
```{.haskell pipe="tee -a main.hs | awk '{print \"> -- \" $0}'"}
pass
```
final code execution:
```{.haskell pipe="sh | awk '{print \"> -- > \" $0}' | sed 's/\> \<//g'"}
set -e
cat main.hs >> Tmodule.hs
cp Tmodule.hs root/literate/main.hs
cd root
echo "module HBandit.Tutorial () where" > src/HBandit/Tutorial.hs
echo "module Bandit.Tutorial () where" > src/Bandit/Tutorial.hs
cabal v2-run literate/main.hs
```
-- |
-- Module : HBandit
-- Module : Bandit
-- Description : The hbandit package
-- Copyright : (c) Argonne National Laboratory, 2019
-- License : MIT
-- Maintainer : fre@freux.fr
module HBandit
( module HBandit.Class,
module HBandit.Types,
module HBandit.Util,
module Bandit
( module Bandit.Class,
module Bandit.Types,
module Bandit.Util,
)
where
import HBandit.Class
import HBandit.Types
import HBandit.Util
import Bandit.Class
import Bandit.Types
import Bandit.Util
{-# LANGUAGE ScopedTypeVariables #-}
-- |
-- Module : HBandit.Class
-- Module : Bandit.Class
-- Copyright : (c) 2019, UChicago Argonne, LLC.
-- License : MIT
-- Maintainer : fre@freux.fr
--
-- This module implements the common interface for instanciating
-- and interacting with Multi-Armed Bandit algoritms.
module HBandit.Class
module Bandit.Class
( -- * Generalized Bandit
Bandit (..),
ExpertRepresentation (..),
......@@ -17,11 +17,10 @@ module HBandit.Class
-- * Discrete Multi-Armed-Bandits
Arms (..),
ParameterFreeMAB (..),
BwCRMAB (..),
)
where
import HBandit.Types
import Bandit.Types
import Protolude
import System.Random
......@@ -56,7 +55,11 @@ class Bandit b hyper a l | b -> l, b -> hyper, b -> a where
-- | @step loss@ iterates the bandit process one step forward.
step :: (RandomGen g, MonadState b m) => g -> l -> m (a, g)
class ContextualBandit b hyper s a l er | b -> l, b -> hyper, b -> s, b -> a, b -> er where
-- | ContextualBandit b hyper a l er is the class for a contextual bandit algorithm.
-- The same concepts as 'Bandit' apply, with the addition of:
--
-- * @er@ is an expert representation (see 'ExpertRepresentation')
class (ExpertRepresentation er s a) => ContextualBandit b hyper s a l er | b -> l, b -> hyper, b -> s, b -> a, b -> er where
-- | Init hyper returns the initial state of the algorithm
initCtx :: hyper -> b
......@@ -64,9 +67,15 @@ class ContextualBandit b hyper s a l er | b -> l, b -> hyper, b -> s, b -> a, b
-- | @step loss@ iterates the bandit process one step forward.
stepCtx :: (RandomGen g, MonadState b m, Ord a) => g -> l -> s -> m (a, g)
-- | ExpertRepresentation er s a is a distribution over
-- experts.
--
-- @represent er@ returns this distribution encoded as a conditional
-- distribution over actions.
class ExpertRepresentation er s a | er -> s, er -> a where
represent :: er -> (s -> NonEmpty (ZeroOne Double, a))
-- | Arms a represents a set of possible actions.
newtype Arms a = Arms (NonEmpty a)
deriving (Show, Generic)
......@@ -87,25 +96,3 @@ class (Eq a, Bandit b (Arms a) a l) => ParameterFreeMAB b a l | b -> l where
-- value @l@.
stepPFMAB :: (RandomGen g, MonadState b m) => g -> l -> m (a, g)