Commit 6075d1f1 authored by Valentin Reis's avatar Valentin Reis
Browse files

rebased

parent 85fa4da5
Pipeline #11885 failed with stages
in 12 seconds
......@@ -13,52 +13,52 @@ build-type: Simple
source-repository head
library
exposed-modules:
Bandit
Bandit.Class
Bandit.EpsGreedy
Bandit.Exp3
Bandit.UCB
Bandit.Exp4R
Bandit.Types
Bandit.Util
hs-source-dirs: src
default-language: Haskell2010
default-extensions: LambdaCase QuasiQuotes DefaultSignatures
ExistentialQuantification RecordWildCards TypeSynonymInstances
StandaloneDeriving FlexibleInstances TupleSections
MultiParamTypeClasses NoImplicitPrelude OverloadedStrings
OverloadedLists ViewPatterns OverloadedLabels DeriveFunctor
TypeFamilies DeriveAnyClass DeriveGeneric DeriveDataTypeable
DeriveFoldable DerivingStrategies TypeApplications MultiWayIf
TemplateHaskell BlockArguments GADTs FlexibleContexts TypeOperators
DataKinds PolyKinds AllowAmbiguousTypes FunctionalDependencies
UndecidableInstances
ghc-options: -Wall -O0 -Wcompat -Wincomplete-uni-patterns
-Widentities -Wredundant-constraints -Wcpp-undef -fwarn-tabs
-fwarn-unused-imports -fwarn-missing-signatures
-fwarn-name-shadowing -fprint-potential-instances
-fwarn-unused-do-bind -fwarn-wrong-do-bind
-fwarn-incomplete-patterns -Wincomplete-record-updates
-Wmonomorphism-restriction -Wimplicit-prelude
-Wmissing-local-signatures -Wmissing-exported-signatures
-Wmissing-export-lists -Wmissing-home-modules -Widentities
-Wredundant-constraints -Wpartial-fields
build-depends:
base -any,
protolude -any,
random -any,
refined -any,
intervals -any,
MonadRandom -any,
-- list-extras -any,
lens -any,
generic-lens -any
exposed-modules:
Bandit
Bandit.Class
Bandit.EpsGreedy
Bandit.Exp3
Bandit.UCB
Bandit.Exp4R
Bandit.Types
Bandit.Util
hs-source-dirs: src
default-language: Haskell2010
default-extensions: LambdaCase QuasiQuotes DefaultSignatures
ExistentialQuantification RecordWildCards TypeSynonymInstances
StandaloneDeriving FlexibleInstances TupleSections
MultiParamTypeClasses NoImplicitPrelude OverloadedStrings
OverloadedLists ViewPatterns OverloadedLabels DeriveFunctor
TypeFamilies DeriveAnyClass DeriveGeneric DeriveDataTypeable
DeriveFoldable DerivingStrategies TypeApplications MultiWayIf
TemplateHaskell BlockArguments GADTs FlexibleContexts TypeOperators
DataKinds PolyKinds AllowAmbiguousTypes FunctionalDependencies
UndecidableInstances
ghc-options: -Wall -O0 -Wcompat -Wincomplete-uni-patterns
-Widentities -Wredundant-constraints -Wcpp-undef -fwarn-tabs
-fwarn-unused-imports -fwarn-missing-signatures
-fwarn-name-shadowing -fprint-potential-instances
-fwarn-unused-do-bind -fwarn-wrong-do-bind
-fwarn-incomplete-patterns -Wincomplete-record-updates
-Wmonomorphism-restriction -Wimplicit-prelude
-Wmissing-local-signatures -Wmissing-exported-signatures
-Wmissing-export-lists -Wmissing-home-modules -Widentities
-Wredundant-constraints -Wpartial-fields
build-depends:
base -any,
protolude -any,
random -any,
refined -any,
intervals -any,
MonadRandom -any,
lens -any,
generic-lens -any
test-suite discover
type: exitcode-stdio-1.0
main-is: Driver.hs
hs-source-dirs: tests
default-language: Haskell2010
other-modules:
Bandit.UtilTest
Bandit.TypesTest
......
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- |
......@@ -50,26 +51,32 @@ import System.Random
--
-- * @l@ is a superset of admissible losses \(\mathbb{L}\) (statically
-- known).
class Bandit b hyper a l | b -> l, b -> hyper, b -> a where
-- | Init hyper returns the initial state of the algorithm and the
-- first action.
init :: (RandomGen g) => g -> hyper -> (b, a, g)
-- | @step loss@ iterates the bandit process one step forward.
step :: (RandomGen g, MonadState b m) => g -> l -> m (a, g)
-- | ContextualBandit b hyper a l er is the class for a contextual bandit algorithm.
-- The same concepts as 'Bandit' apply, with the addition of:
data Bandit b hyper a l
= Bandit
{ -- | Init hyper returns the initial state of the algorithm and the
-- first action.
init :: forall g. (RandomGen g) => g -> hyper -> (b, a, g),
-- | @step loss@ iterates the bandit process one step forward.
step :: forall g m. (RandomGen g, MonadState b m) => g -> l -> m (a, g)
}
-- | ContextualBandit b hyper a l er is the class for a contextual bandit
-- algorithm. The same concepts as 'Bandit' apply, with the addition of:
--
-- * @er@ is an expert representation (see 'ExpertRepresentation')
class (ExpertRepresentation er s a) => ContextualBandit b hyper s a l er | b -> l, b -> hyper, b -> s, b -> a, b -> er where
-- | Init hyper returns the initial state of the algorithm
initCtx :: hyper -> b
-- | @step loss@ iterates the bandit process one step forward.
stepCtx :: (RandomGen g, MonadState b m, Ord a) => g -> l -> s -> m (a, g)
data ContextualBandit b hyper s a l er
= ContextualBandit
{ -- | Init hyper returns the initial state of the algorithm
initCtx :: hyper -> b,
-- | @step loss@ iterates the bandit process one step forward.
stepCtx ::
forall g m.
(RandomGen g, MonadState b m, Ord a) =>
g ->
l ->
s ->
m (a, g)
}
-- | ExpertRepresentation er s a is a representation that can be casted
-- into a distribution over actions.
......
......@@ -11,6 +11,7 @@
-- with probability \(1-\epsilon\).
module Bandit.EpsGreedy
( EpsGreedy (..),
epsGreedy,
Weight (..),
Screening (..),
EpsGreedyHyper (..),
......@@ -57,10 +58,7 @@ data Screening a
deriving (Show, Generic)
-- | The sampling procedure has started.
newtype ExploreExploit a
= ExploreExploit
{ weights :: NonEmpty (Weight a)
}
newtype ExploreExploit a = ExploreExploit {weights :: NonEmpty (Weight a)}
deriving (Show, Generic)
-- | The information maintaining structure for one action.
......@@ -86,53 +84,61 @@ data EpsGreedyHyper a r
-- | The variable rate \(\epsilon\)-Greedy MAB algorithm.
-- Offers no interesting guarantees, works well in practice.
instance (Rate r, Eq a) => Bandit (EpsGreedy a r) (EpsGreedyHyper a r) a Double where
init g (EpsGreedyHyper r (Arms (a :| as))) =
( EpsGreedy
{ t = 1,
rate = r,
lastAction = a,
params = InitialScreening $
Screening
{ screened = [],
screenQueue = as
}
},
a,
g
)
step g l = do
oldAction <- use #lastAction
schedule <- use #rate <&> toRate
e <- use #t <&> schedule
(a, newGen) <- use #params >>= \case
InitialScreening sg ->
case screenQueue sg of
(a : as) -> do
#params
.= InitialScreening
( Screening
{ screened = (l, oldAction) : screened sg,
screenQueue = as
}
)
return (a, g)
[] -> do
let ee =
ExploreExploit
{ weights = toW <$> ((l, oldAction) :| screened sg)
}
#params .= Started ee
pickreturn e g ee
Started ee -> do
let ee' = ee & #weights %~ updateWeight oldAction l
#params . #_Started .= ee'
pickreturn e g ee
#lastAction .= a
#t += 1
return (a, newGen)
epsGreedy ::
(Eq a, Rate r) =>
Bandit
(EpsGreedy a r)
(EpsGreedyHyper a r)
a
Double
epsGreedy =
Bandit
{ init = \g (EpsGreedyHyper r (Arms (a :| as))) ->
( EpsGreedy
{ t = 1,
rate = r,
lastAction = a,
params = InitialScreening $
Screening
{ screened = [],
screenQueue = as
}
},
a,
g
),
step = \g l -> do
oldAction <- use #lastAction
schedule <- use #rate <&> toRate
e <- use #t <&> schedule
(a, newGen) <-
use #params >>= \case
InitialScreening sg ->
case screenQueue sg of
(a : as) -> do
#params
.= InitialScreening
( Screening
{ screened = (l, oldAction) : screened sg,
screenQueue = as
}
)
return (a, g)
[] -> do
let ee =
ExploreExploit
{ weights = toW <$> ((l, oldAction) :| screened sg)
}
#params .= Started ee
pickreturn e g ee
Started ee -> do
let ee' = ee & #weights %~ updateWeight oldAction l
#params . #_Started .= ee'
pickreturn e g ee
#lastAction .= a
#t += 1
return (a, newGen)
}
-- | Action selection and return
pickreturn ::
......@@ -142,9 +148,10 @@ pickreturn ::
ExploreExploit b ->
m (b, g)
pickreturn eps g eeg = do
let (a, g') = runRand (MR.fromList [(True, toRational eps), (False, toRational $ 1 - eps)]) g & \case
(True, g'') -> pickRandom eeg g''
(False, g'') -> (action $ minimumBy (\(averageLoss -> a1) (averageLoss -> a2) -> compare a1 a2) (weights eeg), g'')
let (a, g') =
runRand (MR.fromList [(True, toRational eps), (False, toRational $ 1 - eps)]) g & \case
(True, g'') -> pickRandom eeg g''
(False, g'') -> (action $ minimumBy (\(averageLoss -> a1) (averageLoss -> a2) -> compare a1 a2) (weights eeg), g'')
return (a, g')
-- | Random action selection primitive
......@@ -160,11 +167,12 @@ pickRandom ExploreExploit {..} =
-- | online mean accumulator.
updateAvgLoss :: Double -> Weight a -> Weight a
updateAvgLoss x w = w &~ do
#hits += 1
n <- use #hits <&> fromIntegral
avg <- use #averageLoss
#averageLoss += (x - avg) / (n + 1)
updateAvgLoss x w =
w &~ do
#hits += 1
n <- use #hits <&> fromIntegral
avg <- use #averageLoss
#averageLoss += (x - avg) / (n + 1)
-- | updating the weights
updateWeight ::
......
......@@ -13,6 +13,7 @@
module Bandit.Exp3
( -- * State
Exp3 (..),
exp3,
-- * Internal
Weight (..),
......@@ -59,34 +60,35 @@ data Weight a
deriving (Show, Generic)
-- | The Exponential-weight algorithm for Exploration and Exploitation (EXP3).
instance
(Eq a) =>
Bandit (Exp3 a) (Arms a) a (ZeroOne Double)
where
init g (Arms as) =
( Exp3
{ t = 1,
lastAction = a,
k = length as,
weights = ws
},
a,
g'
)
where
awl = as <&> (Bandit.Types.one,)
(a, g') = sampleWL awl g
ws = as <&> Weight (Probability $ 1.0 / fromIntegral (length (toList as))) (CumulativeLoss 0)
step g (R.unrefine -> l) = do
oldAction <- use #lastAction
#weights %= fmap (\w -> if action w == oldAction then updateCumLoss l w else w)
t <- use #t
k <- use #k
#weights %= recompute t k
#t += 1
pickAction g
exp3 :: (Eq a) => Bandit (Exp3 a) (Arms a) a (ZeroOne Double)
exp3 =
Bandit
{ init = \g (Arms as) ->
let awl = as <&> (Bandit.Types.one,)
(a, g') = sampleWL awl g
ws =
as
<&> Weight
(Probability $ 1.0 / fromIntegral (length (toList as)))
(CumulativeLoss 0)
in ( Exp3
{ t = 1,
lastAction = a,
k = length as,
weights = ws
},
a,
g'
),
step = \g (R.unrefine -> l) -> do
oldAction <- use #lastAction
#weights %= fmap (\w -> if action w == oldAction then updateCumLoss l w else w)
t <- use #t
k <- use #k
#weights %= recompute t k
#t += 1
pickAction g
}
pickAction :: (RandomGen g, MonadState (Exp3 a) m) => g -> m (a, g)
pickAction g = do
......
......@@ -15,6 +15,7 @@
-- Conference on Machine Learning, in PMLR 70:3280-3288
module Bandit.Exp4R
( -- * Interface
exp4r,
Feedback (..),
-- * State
......@@ -53,7 +54,7 @@ data Exp4R s a er
k :: Int,
n :: Int,
lambda :: R.Refined R.NonNegative Double,
constraint :: Double,
constraint :: R.Refined R.NonPositive Double,
experts ::
NonEmpty
( ZeroOne Double,
......@@ -83,54 +84,56 @@ data Feedback
data Exp4RCfg s a er
= Exp4RCfg
{ expertsCfg :: NonEmpty er,
constraintCfg :: Double,
constraintCfg :: R.Refined R.NonPositive Double,
horizonCfg :: R.Refined R.Positive Int,
as :: NonEmpty a
}
deriving (Generic)
instance
-- | The contextual exponential-weight algorithm for Exploration and
-- Exploitation with Experts and Risk Constraints (EXP4R).
exp4r ::
(Eq a, ExpertRepresentation er s a) =>
ContextualBandit (Exp4R s a er) (Exp4RCfg s a er) s a (Maybe Feedback) er
where
initCtx Exp4RCfg {..} =
Exp4R
{ t = 1,
lastAction = Nothing,
k = NE.length as,
n = NE.length expertsCfg,
lambda = lambdaInitial,
constraint = constraintCfg,
horizon = horizonCfg,
experts = (R.unsafeRefine (1 / fromIntegral (NE.length expertsCfg)),) <$> expertsCfg
}
stepCtx g feedback s = do
weightedAdvice <- use #experts <&> fmap (fmap (($ s) . toExpert))
lastAction <- use #lastAction
fromMaybe
pass
(update weightedAdvice <$> lastAction <*> feedback)
let armDistribution :: NonEmpty (ZeroOne Double, a)
armDistribution =
fromMaybe
(panic "internal Exp4R algorithm failure: distribution normalization failed.")
(combineAdvice weightedAdvice)
(a, g') = sampleWL armDistribution g
p_a =
maybe
(panic "internal Exp4R algorithm failure: arm pull issue.")
fst
(find (\x -> snd x == a) armDistribution)
probabilityOf_a :: NonEmpty (ZeroOne Double)
probabilityOf_a = snd <$> weightedAdvice <&> \e ->
maybe
(panic "internal Exp4R algorithm failure: weight computation")
fst
(find (\x -> snd x == a) e)
#lastAction ?= LastAction a p_a probabilityOf_a
return (a, g')
exp4r =
ContextualBandit
{ initCtx = \Exp4RCfg {..} ->
Exp4R
{ t = 1,
lastAction = Nothing,
k = NE.length as,
n = NE.length expertsCfg,
lambda = lambdaInitial,
constraint = constraintCfg,
horizon = horizonCfg,
experts = (R.unsafeRefine (1 / fromIntegral (NE.length expertsCfg)),) <$> expertsCfg
},
stepCtx = \g feedback s -> do
weightedAdvice <- use #experts <&> fmap (fmap (($ s) . toExpert))
lastAction <- use #lastAction
fromMaybe
pass
(update weightedAdvice <$> lastAction <*> feedback)
let armDistribution =
fromMaybe
(panic "internal Exp4R algorithm failure: distribution normalization failed.")
(combineAdvice weightedAdvice)
(a, g') = sampleWL armDistribution g
p_a =
maybe
(panic "internal Exp4R algorithm failure: arm pull issue.")
fst
(find (\x -> snd x == a) armDistribution)
probabilityOf_a :: NonEmpty (ZeroOne Double)
probabilityOf_a =
snd <$> weightedAdvice <&> \e ->
maybe
(panic "internal Exp4R algorithm failure: weight computation")
fst
(find (\x -> snd x == a) e)
#lastAction ?= LastAction a p_a probabilityOf_a
return (a, g')
}
update ::
(MonadState (Exp4R s a er) m) =>
......@@ -145,7 +148,7 @@ update
lam <- R.unrefine <$> use #lambda
delta <- get <&> mkDelta
mu <- get <&> mkMu
beta <- use #constraint
beta <- R.unrefine <$> use #constraint
let numeratorTerm (R.unrefine -> w, _) p =
w * exp (- mu * (p * (lam * r + c) / p_a))
wUpdate = NE.zipWith numeratorTerm weightedAdvice pPolicy_a
......@@ -164,14 +167,17 @@ combineAdvice ::
(Ord a) =>
NonEmpty (ZeroOne Double, NonEmpty (ZeroOne Double, a)) ->
Maybe (NonEmpty (ZeroOne Double, a))
combineAdvice weightedAdvice = normalizeDistribution $
groupAllWith1 snd dirtyArmDistribution
<&> \gs -> (getSum $ sconcat (gs <&> Sum . fst), snd $ NE.head gs)
combineAdvice weightedAdvice =
normalizeDistribution $
groupAllWith1 snd dirtyArmDistribution
<&> \gs -> (getSum $ sconcat (gs <&> Sum . fst), snd $ NE.head gs)
where
dirtyArmDistribution = sconcat $
weightedAdvice
<&> \(wi, advices) -> advices
<&> \(p, ai) -> (R.unrefine p * R.unrefine wi, ai)
dirtyArmDistribution =
sconcat $
weightedAdvice
<&> \(wi, advices) ->
advices
<&> \(p, ai) -> (R.unrefine p * R.unrefine wi, ai)
-- | \( \mu = \sqrt{\frac{\ln N }{ (T(K+4))}} \)
mkMu :: Exp4R s a er -> Double
......
......@@ -9,6 +9,7 @@
-- This module implements the UCB family of algorithms.
module Bandit.UCB
( UCB (..),
ucb,
UCBHyper (..),
hyperAlphaUCB,
hyperUCB1,
......@@ -57,53 +58,55 @@ toW (loss, action) = Weight loss 1 action
-- | The variable rate \(\epsilon\)-Greedy MAB algorithm.
-- Offers no interesting guarantees, works well in practice.
instance (InvLFPhi p, Eq a) => Bandit (UCB a p) (UCBHyper a p) a (ZeroOne Double) where
init g (UCBHyper invLFPhiUCB alphaUCB (Arms (a :| as))) =
( UCB
{ t = 1,
alpha = alphaUCB,
invLFPhi = invLFPhiUCB,
lastAction = a,
params = InitialScreening $
Screening
{ screened = [],
screenQueue = as
}
},
a,
g
)
step g (unrefine . rewardCostBijection -> l) = do
oldAction <- use #lastAction
invLFPhiFunc <- use #invLFPhi <&> toInvLFPhi
alphaValue <- use #alpha
#t += 1
t <- use #t
a <- use #params >>= \case
InitialScreening sg ->
case screenQueue sg of
(a : as) -> do
#params . #_InitialScreening
.= Screening
{ screened = (l, oldAction) : screened sg,
screenQueue = as
}
return a
[] -> do
let ee =
ExploreExploit
{ weights = toW <$> ((l, oldAction) :| screened sg)
}
#params .= Started ee
pickreturn t invLFPhiFunc alphaValue ee
Started ee -> do
let ee' = ee & #weights %~ updateWeight oldAction l
#params .= Started ee'
pickreturn t invLFPhiFunc alphaValue ee'
#lastAction .= a
return (a, g)
ucb :: (Eq a, InvLFPhi p) => Bandit (UCB a p) (UCBHyper a p) a (ZeroOne Double)
ucb =
Bandit
{ init = \g (UCBHyper invLFPhiUCB alphaUCB (Arms (a :| as))) ->
( UCB
{ t = 1,
alpha = alphaUCB,
invLFPhi = invLFPhiUCB,
lastAction = a,
params = InitialScreening $
Screening
{ screened = [],
screenQueue = as
}
},
a,
g
),
step = \g (unrefine . rewardCostBijection -> l) -> do
oldAction <- use #lastAction
invLFPhiFunc <- use #invLFPhi <&> toInvLFPhi
alphaValue <- use #alpha