Commit cdaec3f1 authored by Brice Videau's avatar Brice Videau
Browse files

Added roulette sampling distribution.

parent 48cb4821
......@@ -124,5 +124,5 @@ m4/lt~obsolete.m4
Makefile
#Misc
build
build*
src/config.h.in
......@@ -8,6 +8,7 @@ extern "C" {
enum ccs_distribution_type_e {
CCS_UNIFORM,
CCS_NORMAL,
CCS_ROULETTE,
CCS_DISTRIBUTION_TYPE_MAX,
CCS_DISTRIBUTION_TYPE_FORCE_32BIT = INT_MAX
};
......@@ -68,6 +69,11 @@ ccs_create_uniform_float_distribution(ccs_float_t lower,
ccs_float_t quantization,
ccs_distribution_t *distribution_ret);
extern ccs_error_t
ccs_create_roulette_distribution(size_t num_areas,
ccs_float_t *areas,
ccs_distribution_t *distribution_ret);
// Accessors
extern ccs_error_t
ccs_distribution_get_type(ccs_distribution_t distribution,
......@@ -101,8 +107,17 @@ ccs_normal_distribution_get_parameters(ccs_distribution_t distribution,
extern ccs_error_t
ccs_uniform_distribution_get_parameters(ccs_distribution_t distribution,
ccs_numeric_t *lower,
ccs_numeric_t *upper);
ccs_numeric_t *lower_ret,
ccs_numeric_t *upper_ret);
extern ccs_error_t
ccs_roulette_distribution_get_num_areas(ccs_distribution_t distribution,
size_t *num_areas_ret);
extern ccs_error_t
ccs_roulette_distribution_get_areas(ccs_distribution_t distribution,
size_t num_areas,
ccs_float_t *areas);
// Sampling Interface
extern ccs_error_t
......
......@@ -14,6 +14,7 @@ libcconfigspace_la_SOURCES = \
distribution_internal.h \
distribution_uniform.c \
distribution_normal.c \
distribution_roulette.c \
hyperparameter.c \
hyperparameter_internal.h \
hyperparameter_numerical.c
......
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <math.h>
#include "cconfigspace_internal.h"
#include "distribution_internal.h"
struct _ccs_distribution_roulette_data_s {
_ccs_distribution_common_data_t common_data;
ccs_int_t num_areas;
ccs_float_t *areas;
};
typedef struct _ccs_distribution_roulette_data_s _ccs_distribution_roulette_data_t;
static ccs_error_t
_ccs_distribution_del(ccs_object_t o) {
(void)o;
return CCS_SUCCESS;
}
static ccs_error_t
_ccs_distribution_roulette_get_bounds(_ccs_distribution_data_t *data,
ccs_interval_t *interval_ret);
static ccs_error_t
_ccs_distribution_roulette_samples(_ccs_distribution_data_t *data,
ccs_rng_t rng,
size_t num_values,
ccs_numeric_t *values);
static _ccs_distribution_ops_t _ccs_distribution_roulette_ops = {
{ &_ccs_distribution_del },
&_ccs_distribution_roulette_samples,
&_ccs_distribution_roulette_get_bounds
};
static ccs_error_t
_ccs_distribution_roulette_get_bounds(_ccs_distribution_data_t *data,
ccs_interval_t *interval_ret) {
_ccs_distribution_roulette_data_t *d = (_ccs_distribution_roulette_data_t *)data;
interval_ret->type = CCS_NUM_INTEGER;
interval_ret->lower = CCSI(INT64_C(0));
interval_ret->upper = CCSI(d->num_areas);
interval_ret->lower_included = CCS_TRUE;
interval_ret->upper_included = CCS_FALSE;
return CCS_SUCCESS;
}
static ccs_error_t
_ccs_distribution_roulette_samples(_ccs_distribution_data_t *data,
ccs_rng_t rng,
size_t num_values,
ccs_numeric_t *values) {
_ccs_distribution_roulette_data_t *d = (_ccs_distribution_roulette_data_t *)data;
gsl_rng *grng;
ccs_error_t err = ccs_rng_get_gsl_rng(rng, &grng);
if (err)
return err;
for (size_t i = 0; i < num_values; i++) {
ccs_float_t rnd = gsl_rng_uniform(grng);
ccs_int_t upper = d->num_areas - 1;
ccs_int_t lower = 0;
ccs_int_t index = upper * rnd;
int found = 0;
while( !found ) {
if ( rnd < d->areas[index] ) {
upper = index - 1;
index = (lower+upper)/2;
} else if ( rnd >= d->areas[index+1] ) {
lower = index + 1;
index = (lower+upper)/2;
} else
found = 1;
}
values[i].i = index;
}
return CCS_SUCCESS;
}
ccs_error_t
ccs_create_roulette_distribution(size_t num_areas,
ccs_float_t *areas,
ccs_distribution_t *distribution_ret) {
if (!distribution_ret || !areas || !num_areas || num_areas > INT64_MAX)
return -CCS_INVALID_VALUE;
ccs_float_t sum = 0.0;
for(size_t i = 0; i < num_areas; i++) {
if (areas[i] < 0.0)
return -CCS_INVALID_VALUE;
sum += areas[i];
}
if (sum == 0.0)
return -CCS_INVALID_VALUE;
ccs_float_t inv_sum = 1.0/sum;
if (isnan(inv_sum) || !isfinite(inv_sum))
return -CCS_INVALID_VALUE;
uintptr_t mem = (uintptr_t)calloc(1, sizeof(struct _ccs_distribution_s) + sizeof(_ccs_distribution_roulette_data_t) + sizeof(ccs_float_t)*(num_areas + 1));
if (!mem)
return -CCS_ENOMEM;
ccs_distribution_t distrib = (ccs_distribution_t)mem;
_ccs_object_init(&(distrib->obj), CCS_DISTRIBUTION, (_ccs_object_ops_t *)&_ccs_distribution_roulette_ops);
_ccs_distribution_roulette_data_t * distrib_data = (_ccs_distribution_roulette_data_t *)(mem + sizeof(struct _ccs_distribution_s));
distrib_data->common_data.type = CCS_ROULETTE;
distrib_data->common_data.data_type = CCS_NUM_INTEGER;
distrib_data->common_data.scale_type = CCS_LINEAR;
distrib_data->common_data.quantization = CCSI(0);
distrib_data->num_areas = num_areas;
distrib_data->areas = (ccs_float_t *)(mem + sizeof(struct _ccs_distribution_s) + sizeof(_ccs_distribution_roulette_data_t));
distrib_data->areas[0] = 0.0;
for(size_t i = 1; i <= num_areas; i++) {
distrib_data->areas[i] = distrib_data->areas[i-1] + areas[i-1] * inv_sum;
}
distrib_data->areas[num_areas] = 1.0;
if (distrib_data->areas[num_areas] < distrib_data->areas[num_areas-1]) {
free((void *)mem);
return -CCS_INVALID_VALUE;
}
distrib->data = (_ccs_distribution_data_t *)distrib_data;
*distribution_ret = distrib;
return CCS_SUCCESS;
}
extern ccs_error_t
ccs_roulette_distribution_get_num_areas(ccs_distribution_t distribution,
size_t *num_areas_ret) {
if (!distribution || distribution->obj.type != CCS_DISTRIBUTION)
return -CCS_INVALID_OBJECT;
if (!distribution->data || ((_ccs_distribution_common_data_t*)distribution->data)->type != CCS_ROULETTE)
return -CCS_INVALID_OBJECT;
if (!num_areas_ret)
return -CCS_INVALID_VALUE;
_ccs_distribution_roulette_data_t * data = (_ccs_distribution_roulette_data_t *)distribution->data;
*num_areas_ret = data->num_areas;
return CCS_SUCCESS;
}
extern ccs_error_t
ccs_roulette_distribution_get_areas(ccs_distribution_t distribution,
size_t num_areas,
ccs_float_t *areas) {
if (!distribution || distribution->obj.type != CCS_DISTRIBUTION)
return -CCS_INVALID_OBJECT;
if (!distribution->data || ((_ccs_distribution_common_data_t*)distribution->data)->type != CCS_ROULETTE)
return -CCS_INVALID_OBJECT;
if (!areas)
return -CCS_INVALID_VALUE;
_ccs_distribution_roulette_data_t * data = (_ccs_distribution_roulette_data_t *)distribution->data;
if ((ccs_int_t)num_areas != data->num_areas)
return -CCS_INVALID_VALUE;
for (size_t i = 0; i < num_areas; i++)
areas[i] = data->areas[i+1] - data->areas[i];
return CCS_SUCCESS;
}
......@@ -188,21 +188,21 @@ ccs_create_uniform_distribution(ccs_numeric_type_t data_type,
ccs_error_t
ccs_uniform_distribution_get_parameters(ccs_distribution_t distribution,
ccs_numeric_t *lower,
ccs_numeric_t *upper) {
ccs_numeric_t *lower_ret,
ccs_numeric_t *upper_ret) {
if (!distribution || distribution->obj.type != CCS_DISTRIBUTION)
return -CCS_INVALID_OBJECT;
if (!distribution->data || ((_ccs_distribution_common_data_t*)distribution->data)->type != CCS_UNIFORM)
return -CCS_INVALID_OBJECT;
if (!lower && !upper)
if (!lower_ret && !upper_ret)
return -CCS_INVALID_VALUE;
_ccs_distribution_uniform_data_t * data = (_ccs_distribution_uniform_data_t *)distribution->data;
if (lower) {
*lower = data->lower;
if (lower_ret) {
*lower_ret = data->lower;
}
if (upper) {
*upper = data->upper;
if (upper_ret) {
*upper_ret = data->upper;
}
return CCS_SUCCESS;
}
......
......@@ -8,6 +8,7 @@ RNG_TESTS = \
test_interval \
test_uniform_distribution \
test_normal_distribution \
test_roulette_distribution \
test_numerical_hyperparameter
# unit tests
......
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <cconfigspace.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>
void test_create_roulette_distribution() {
ccs_distribution_t distrib = NULL;
ccs_error_t err = CCS_SUCCESS;
int32_t refcount;
ccs_object_type_t otype;
ccs_distribution_type_t dtype;
ccs_scale_type_t stype;
ccs_numeric_type_t data_type;
ccs_numeric_t quantization;
ccs_interval_t interval;
const size_t num_areas = 4;
ccs_float_t areas[num_areas];
size_t num_areas_ret;
ccs_float_t areas_ret[num_areas];
const ccs_float_t epsilon = 1e-15;
for(size_t i = 0; i < num_areas; i++) {
areas[i] = (double)(i+1);
}
err = ccs_create_roulette_distribution(
num_areas,
areas,
&distrib);
assert( err == CCS_SUCCESS );
err = ccs_object_get_type(distrib, &otype);
assert( err == CCS_SUCCESS );
assert( otype == CCS_DISTRIBUTION );
err = ccs_distribution_get_type(distrib, &dtype);
assert( err == CCS_SUCCESS );
assert( dtype == CCS_ROULETTE );
err = ccs_distribution_get_data_type(distrib, &data_type);
assert( err == CCS_SUCCESS );
assert( data_type == CCS_NUM_INTEGER );
err = ccs_distribution_get_scale_type(distrib, &stype);
assert( err == CCS_SUCCESS );
assert( stype == CCS_LINEAR );
err = ccs_distribution_get_quantization(distrib, &quantization);
assert( err == CCS_SUCCESS );
assert( quantization.i == 0 );
err = ccs_distribution_get_bounds(distrib, &interval);
assert( err == CCS_SUCCESS );
assert( interval.type == CCS_NUM_INTEGER );
assert( interval.lower.i == 0 );
assert( interval.lower_included == CCS_TRUE );
assert( interval.upper.i == 4 );
assert( interval.upper_included == CCS_FALSE );
err = ccs_roulette_distribution_get_num_areas(distrib, &num_areas_ret);
assert( err == CCS_SUCCESS );
assert( num_areas_ret == num_areas );
err = ccs_roulette_distribution_get_areas(distrib, num_areas_ret, areas_ret);
assert( err == CCS_SUCCESS );
ccs_float_t inv_sum = 2.0 / (num_areas * (num_areas + 1));
for (size_t i = 0; i < num_areas; i++) {
assert( areas_ret[i] <= areas[i] * inv_sum + epsilon &&
areas_ret[i] >= areas[i] * inv_sum - epsilon );
}
err = ccs_object_get_refcount(distrib, &refcount);
assert( err == CCS_SUCCESS );
assert( refcount == 1 );
err = ccs_release_object(distrib);
assert( err == CCS_SUCCESS );
}
void test_create_roulette_distribution_errors() {
ccs_distribution_t distrib = NULL;
ccs_error_t err = CCS_SUCCESS;
const size_t num_areas = 4;
ccs_float_t areas[num_areas];
for(size_t i = 0; i < num_areas; i++) {
areas[i] = (double)(i+1);
}
err = ccs_create_roulette_distribution(
0,
areas,
&distrib);
assert( err == -CCS_INVALID_VALUE );
err = ccs_create_roulette_distribution(
SIZE_MAX,
areas,
&distrib);
assert( err == -CCS_INVALID_VALUE );
err = ccs_create_roulette_distribution(
num_areas,
NULL,
&distrib);
assert( err == -CCS_INVALID_VALUE );
err = ccs_create_roulette_distribution(
num_areas,
areas,
NULL);
assert( err == -CCS_INVALID_VALUE );
areas[1] = -2;
err = ccs_create_roulette_distribution(
num_areas,
areas,
&distrib);
assert( err == -CCS_INVALID_VALUE );
}
void test_roulette_distribution() {
ccs_distribution_t distrib = NULL;
ccs_rng_t rng = NULL;
ccs_error_t err = CCS_SUCCESS;
const size_t num_samples = 10000;
ccs_numeric_t samples[num_samples];
const size_t num_areas = 4;
ccs_float_t areas[num_areas];
int counts[num_areas];
for(size_t i = 0; i < num_areas; i++) {
areas[i] = (double)(i+1);
counts[i] = 0;
}
err = ccs_rng_create(&rng);
assert( err == CCS_SUCCESS );
err = ccs_create_roulette_distribution(
num_areas,
areas,
&distrib);
assert( err == CCS_SUCCESS );
err = ccs_distribution_samples(distrib, rng, num_samples, samples);
assert( err == CCS_SUCCESS );
ccs_float_t sum = 0.0;
ccs_float_t inv_sum = 0.0;
for(size_t i = 0; i < num_areas; i++) {
sum += areas[i];
}
inv_sum = 1.0 / sum;
for(size_t i = 0; i < num_samples; i++) {
assert( samples[i].i >=0 && samples[i].i < 4 );
counts[samples[i].i]++;
}
for(size_t i = 0; i < num_areas; i++) {
ccs_float_t target = num_samples * areas[i] * inv_sum;
assert( counts[i] >= target * 0.95 && counts[i] <= target * 1.05 );
}
err = ccs_release_object(distrib);
assert( err == CCS_SUCCESS );
err = ccs_release_object(rng);
assert( err == CCS_SUCCESS );
}
void test_roulette_distribution_zero() {
ccs_distribution_t distrib = NULL;
ccs_rng_t rng = NULL;
ccs_error_t err = CCS_SUCCESS;
const size_t num_samples = 8000;
ccs_numeric_t samples[num_samples];
const size_t num_areas = 4;
ccs_float_t areas[num_areas];
int counts[num_areas];
for(size_t i = 0; i < num_areas; i++) {
areas[i] = (double)(i+1);
counts[i] = 0;
}
areas[1] = 0.0;
err = ccs_rng_create(&rng);
assert( err == CCS_SUCCESS );
err = ccs_create_roulette_distribution(
num_areas,
areas,
&distrib);
assert( err == CCS_SUCCESS );
err = ccs_distribution_samples(distrib, rng, num_samples, samples);
assert( err == CCS_SUCCESS );
ccs_float_t sum = 0.0;
ccs_float_t inv_sum = 0.0;
for(size_t i = 0; i < num_areas; i++) {
sum += areas[i];
}
inv_sum = 1.0 / sum;
for(size_t i = 0; i < num_samples; i++) {
assert( samples[i].i >=0 && samples[i].i < 4 );
counts[samples[i].i]++;
}
for(size_t i = 0; i < num_areas; i++) {
ccs_float_t target = num_samples * areas[i] * inv_sum;
assert( counts[i] >= target * 0.95 && counts[i] <= target * 1.05 );
}
err = ccs_release_object(distrib);
assert( err == CCS_SUCCESS );
err = ccs_release_object(rng);
assert( err == CCS_SUCCESS );
}
int main(int argc, char *argv[]) {
ccs_init();
test_create_roulette_distribution();
test_create_roulette_distribution_errors();
test_roulette_distribution();
test_roulette_distribution_zero();
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment