Commit fa36f21c authored by Valentin Reis's avatar Valentin Reis
Browse files

adds readme

parent 087ec3ae
Pipeline #10322 failed with stages
in 61 minutes and 39 seconds
......@@ -42,6 +42,13 @@ nix:package:
script: nix-build -A hbandit --no-build-output
make:readme:
stage: build
tags:
- kvm
- nix
script: nix-shell -p gnumake --run "make README.md"
make:doc:
stage: build
tags:
......
......@@ -176,6 +176,7 @@ src/Bandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
aeson
pretty-simple
panhandle
pandoc-citeproc
panpipe
unlit
pandoc
......@@ -188,6 +189,29 @@ src/Bandit/Tutorial.hs: literate/tutorial.md hbandit.nix src
pandoc --filter $$(which panpipe) --filter $$(which panhandle) -f markdown+lhs -t markdown+lhs $< | unlit -f bird > $@
'
README.md: literate/readme.md
@nix-shell --pure -E '
with import <nixpkgs> {};
with haskellPackages;
mkShell {
name="pandoc-tools";
buildInputs = [
inline-r
aeson
pretty-simple
panhandle
pandoc-citeproc
panpipe
pandoc
pkgs.which
cabal-install
R];
R_LIBS_SITE = "$${builtins.readFile r-libs-site}";
}
' --run bash <<< '
pandoc -t markdown_strict --filter $$(which pandoc-citeproc) -s $< -o $@
'
.PHONY:clean
clean:
rm -rf .build
......
# hbandit
hbandit
=======
Safe Multi-Armed Bandit implementations: EXP3, BwCR (wip), Exp4.R
Safe Multi-Armed Bandit implementations:
- Eps-Greedy (fixed rate)
- EXP3 (hyperparameter-free rate from \[[1](#ref-bubeck2012regret)\])
- Exp4.R \[[2](#ref-sun2017safety)\]
<!-- vim: set ft=markdown.pandoc cole=0: -->
\[1\] Bubeck, S. et al. 2012. Regret analysis of stochastic and
nonstochastic multi-armed bandit problems. *Foundations and Trends in
Machine Learning*. 5, 1 (2012), 1–122.
\[2\] Sun, W. et al. 2017. Safety-aware algorithms for adversarial
contextual bandit. *Proceedings of the 34th international conference on
machine learning-volume 70* (2017), 3280–3288.
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-US">
<info>
<title>ACM SIG Proceedings ("et al." for 3+ authors)</title>
<id>http://www.zotero.org/styles/acm-sig-proceedings</id>
<link href="http://www.zotero.org/styles/acm-sig-proceedings" rel="self"/>
<link href="http://www.acm.org/sigs/publications/proceedings-templates" rel="documentation"/>
<author>
<name>Naeem Esfahani</name>
<email>nesfaha2@gmu.edu</email>
<uri>http://mason.gmu.edu/~nesfaha2/</uri>
</author>
<contributor>
<name>Chris Horn</name>
<email>chris.horn@securedecisions.com</email>
</contributor>
<contributor>
<name>Patrick O'Brien</name>
</contributor>
<category citation-format="numeric"/>
<category field="science"/>
<category field="engineering"/>
<updated>2017-07-15T11:28:14+00:00</updated>
<rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
</info>
<macro name="author">
<choose>
<if type="webpage">
<text variable="title" suffix=":"/>
</if>
<else>
<names variable="author">
<name name-as-sort-order="all" and="text" sort-separator=", " initialize-with="." delimiter-precedes-last="never" delimiter=", "/>
<label form="short" prefix=" "/>
<substitute>
<names variable="editor"/>
<names variable="translator"/>
</substitute>
</names>
</else>
</choose>
</macro>
<macro name="editor">
<names variable="editor">
<name initialize-with="." delimiter=", " and="text"/>
<label form="short" prefix=", "/>
</names>
</macro>
<macro name="access">
<choose>
<if type="article-journal" match="any">
<text variable="DOI" prefix=". DOI:https://doi.org/"/>
</if>
</choose>
</macro>
<citation collapse="citation-number">
<sort>
<key variable="citation-number"/>
</sort>
<layout prefix="[" suffix="]" delimiter=", ">
<text variable="citation-number"/>
</layout>
</citation>
<bibliography entry-spacing="0" second-field-align="flush" et-al-min="3" et-al-use-first="1">
<sort>
<key macro="author"/>
<key variable="title"/>
</sort>
<layout suffix=".">
<text variable="citation-number" prefix="[" suffix="]"/>
<text macro="author" suffix=" "/>
<date variable="issued" suffix=". ">
<date-part name="year"/>
</date>
<choose>
<if type="paper-conference">
<group delimiter=". ">
<text variable="title"/>
<group delimiter=" ">
<text variable="container-title" font-style="italic"/>
<group delimiter=", ">
<group delimiter=", " prefix="(" suffix=")">
<text variable="publisher-place"/>
<date variable="issued">
<date-part name="month" form="short" suffix=" "/>
<date-part name="year"/>
</date>
</group>
<text variable="page"/>
</group>
</group>
</group>
</if>
<else-if type="article-journal">
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<group delimiter=", ">
<text variable="volume"/>
<group delimiter=" ">
<text variable="issue"/>
<date variable="issued" prefix="(" suffix=")">
<date-part name="month" form="short" suffix=" "/>
<date-part name="year"/>
</date>
</group>
<text variable="page"/>
</group>
</group>
</else-if>
<else-if type="patent">
<group delimiter=". ">
<text variable="title"/>
<text variable="number"/>
<date variable="issued">
<date-part name="month" form="short" suffix=" "/>
<date-part name="day" suffix=", "/>
<date-part name="year"/>
</date>
</group>
</else-if>
<else-if type="thesis">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="archive_location" prefix="Doctoral Thesis #"/>
<text variable="publisher"/>
</group>
</else-if>
<else-if type="report">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="number" prefix="Technical Report #"/>
<text variable="publisher"/>
</group>
</else-if>
<else-if type="webpage">
<group delimiter=". ">
<text variable="URL" font-style="italic"/>
<date variable="accessed" prefix="Accessed: ">
<date-part name="year" suffix="-"/>
<date-part name="month" form="numeric-leading-zeros" suffix="-"/>
<date-part name="day" form="numeric-leading-zeros"/>
</date>
</group>
</else-if>
<else-if type="chapter paper-conference" match="any">
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<text macro="editor"/>
<text variable="publisher"/>
<text variable="page"/>
</group>
</else-if>
<else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
<group delimiter=". ">
<text variable="title" font-style="italic"/>
<text variable="publisher"/>
</group>
</else-if>
<else>
<group delimiter=". ">
<text variable="title"/>
<text variable="container-title" font-style="italic"/>
<text variable="publisher"/>
</group>
</else>
</choose>
<text macro="access"/>
</layout>
</bibliography>
</style>
---
bibliography: literate/refs.bib
csl: literate/acm.csl
link-citations: true
---
# hbandit
Safe Multi-Armed Bandit implementations:
- Eps-Greedy (fixed rate)
- EXP3 (hyperparameter-free rate from @bubeck2012regret)
- Exp4.R @sun2017safety
<!-- vim: set ft=markdown.pandoc cole=0: -->
@article{bubeck2012regret,
title={Regret analysis of stochastic and nonstochastic multi-armed bandit problems},
author={Bubeck, S{\'e}bastien and Cesa-Bianchi, Nicolo and others},
journal={Foundations and Trends{\textregistered} in Machine Learning},
volume={5},
number={1},
pages={1--122},
year={2012},
publisher={Now Publishers, Inc.}
}
@inproceedings{sun2017safety,
title={Safety-aware algorithms for adversarial contextual bandit},
author={Sun, Wen and Dey, Debadeepta and Kapoor, Ashish},
booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
pages={3280--3288},
year={2017},
organization={JMLR. org}
}
literate/regretPlot.png

71 KB | W: | H:

literate/regretPlot.png

66.6 KB | W: | H:

literate/regretPlot.png
literate/regretPlot.png
literate/regretPlot.png
literate/regretPlot.png
  • 2-up
  • Swipe
  • Onion skin
literate/summaryPlot.png

8.03 KB | W: | H:

literate/summaryPlot.png

7.64 KB | W: | H:

literate/summaryPlot.png
literate/summaryPlot.png
literate/summaryPlot.png
literate/summaryPlot.png
  • 2-up
  • Swipe
  • Onion skin
{-# LANGUAGE DerivingVia #-}
-- |
-- Module : Bandit.BwCR
-- Copyright : (c) 2019, UChicago Argonne, LLC.
-- License : MIT
-- Maintainer : fre@freux.fr
--
-- Bandits with convex knapsacks and concave rewards (BwCR). See [1]
--
-- The parameters from the paper are:
-- Convex set \(S \in [0,1]^d\).
-- Convex loss function \(f : [0,1]^d -> [0,1]\) (written as concave objective in the paper).
-- \(m\) arms.
--
-- Here, we implement the special LP case.
-- This means that the case of \(k\) linearly-combined rewards
-- (with weights \(\lambda\)) and \(c\) constraints (so that \(d = c+k\)) is
-- implemented.
-- More precisely, \(f(X) = \sum_{i=1..k} \lambda_i X_i\) and
-- \(S = [0,1]^k \times [a_1,b_1] \times \ldots \times [a_c,b_c]\).
--
-- In this restricted case, the minimization(maximization in the original paper)
-- inside the bandit iteration is a LP with k variables and c constraints. This
-- LP is solved via GLPK bindings.
--
-- - [1] Bandits with Global Convex Constraints and Objective,
-- Shipra Agrawal, Nikhil R.Devanur. https://pubsonline.informs.org/doi/abs/10.1287/opre.2019.1840
module Bandit.BwCR
( BwCR (..),
BwCRHyper (..),
ScreeningBwCR (..),
UCBBwCR (..),
Weight (..),
T (..),
mkHyper,
)
where
import Bandit.Class
import Bandit.Types
import Numeric.Interval
import Protolude
-- | The @T@ newtype is used to wrap the horizon.
newtype T = T Double deriving (Generic, Eq)
-- | The BwCR state
data BwCR a l
= -- | Still screening for initial estimates
Screening (ScreeningBwCR a l)
| -- | The UCB sampling procedure has started.
UCB (UCBBwCR a l)
deriving (Generic)
-- | ScreeningBwCR is a component of the BwCR state.
data ScreeningBwCR a l
= ScreeningBwCR
{ tScreening :: Int,
screening :: a,
screened :: [([ZeroOne Double], a)],
screenQueue :: [a],
screeningHyper :: BwCRHyper a l
}
deriving (Generic)
-- | The information maintaining structure for one action.
data Weight a
= Weight
{ cumulativeEstimates :: [ZeroOne Double],
hits :: Int,
action :: a
}
deriving (Generic)
-- | UCBBwCR is a component of the BwCR state.
data UCBBwCR a l
= UCBBwCR
{ t :: Int,
lastAction :: a,
k :: Int,
weights :: NonEmpty (Weight a),
ucbHyper :: BwCRHyper a l
}
deriving (Generic)
-- | @mkHyper@ \(\delta\) builds the hyperparameter for the original paper
-- bound with confidence interval in probability \(1-\delta\).
mkHyper ::
Double ->
T ->
Arms a ->
Interval [ZeroOne Double] ->
[ZeroOne Double] ->
BwCRHyper a [ZeroOne Double]
mkHyper delta (T totalRounds) arms@(Arms nelarms) constraints objectiveWeights =
BwCRHyper
{ gamma = log (totalRounds * fromIntegral (length $ toList nelarms) * fromIntegral (length objectiveWeights) / delta),
..
}
-- | the BwCR hyperparameter \(\gamma\) used in \(rad_{\gamma(\nu,N)}\)
-- used in the original paper.
data BwCRHyper a l
= BwCRHyper
{ gamma :: Double,
arms :: Arms a,
constraints :: Interval l,
objectiveWeights :: l
}
deriving (Generic)
-- | The Bandits with Concave Rewards and Convex Knapsacks (BwCR) algorithm.
instance Bandit (BwCR a [ZeroOne Double]) (BwCRHyper a [ZeroOne Double]) a [ZeroOne Double] where
init g h@(BwCRHyper _ (Arms (a :| as)) _ _) =
( Screening $ ScreeningBwCR
{ tScreening = 1,
screeningHyper = h,
screening = a,
screened = [],
screenQueue = as
},
a,
g
)
step g l =
get >>= \case
Screening sg ->
case screenQueue sg of
(a : as) -> do
put . Screening $
sg
{ tScreening = tScreening sg + 1,
screening = a,
screened = (l, screening sg) : screened sg,
screenQueue = as
}
return (a, g)
[] -> do
let eeg = UCBBwCR
{ t = tScreening sg + 1,
ucbHyper = screeningHyper sg,
lastAction = screening sg,
k = length (screened sg) + 1,
weights = toW <$> ((l, screening sg) :| screened sg)
}
pickreturn eeg g
where
toW :: ([ZeroOne Double], a) -> Weight a
toW (cumulativeEstimates, action) = Weight {hits = 1, ..}
UCB s -> pickreturn s g
-- | Action selection primitive
pickAction :: UCBBwCR a l -> a
pickAction (UCBBwCR _ lastAction _ _ _) = lastAction
-- | Action selection and return
pickreturn ::
(MonadState (BwCR a l) m) =>
UCBBwCR a l ->
g ->
m (a, g)
pickreturn eeg g = do
let a = pickAction eeg
put . UCB $ eeg {lastAction = a}
return (a, g)
-- | The BwCR algorithm with a linear combination as objective. Under IID data sources,
-- This offers for any \(\delta \geq 0 \) the regret bounds in probability \(1-\delta\):
-- \[
-- R_{objective}(T) = O (\|\lambda\| d \sqrt{\frac{\gamma m}{T}} )
-- \]
-- \[
-- R_{constraints}(T) = O ( d \sqrt{\frac{\gamma m}{T}} )
-- \]
-- Where \(\gamma = O (\log(\frac{mTd}{\delta}))\).
instance (Eq a) => BwCRMAB (BwCR a [ZeroOne Double]) a (BwCRHyper a) [ZeroOne Double]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment