Commit 2c47b734 authored by Hal Finkel's avatar Hal Finkel
Browse files

more work on adding SZ (latest version)

parent 6461f57b
......@@ -63,11 +63,11 @@ BLOSC_CPPFLAGS := \
BASE_CPPFLAGS := $(BLOSC_CPPFLAGS) -I. -D__STDC_CONSTANT_MACROS
FEDIR = frontend
FE_CFLAGS := -g -fPIC -O3 -fopenmp
FE_CFLAGS := -g -fPIC -O3 -fopenmp -std=gnu99
FE_CPPFLAGS := $(BASE_CPPFLAGS) -Ithirdparty/sqlite -DGENERICIO_NO_MPI
MPIDIR = mpi
MPI_CFLAGS := -g -O3 -fopenmp
MPI_CFLAGS := -g -O3 -fopenmp -std=gnu99
MPI_CPPFLAGS := $(BASE_CPPFLAGS)
$(FEDIR):
......@@ -169,7 +169,11 @@ BLOSC_O := \
thirdparty/SZ/sz/src/sz_float_pwr.o \
thirdparty/SZ/sz/src/sz_double_pwr.o \
thirdparty/SZ/sz/src/szd_float_pwr.o \
thirdparty/SZ/sz/src/szd_double_pwr.o
thirdparty/SZ/sz/src/szd_double_pwr.o \
thirdparty/SZ/sz/src/sz_double_ts.o \
thirdparty/SZ/sz/src/sz_float_ts.o \
thirdparty/SZ/sz/src/szd_double_ts.o \
thirdparty/SZ/sz/src/szd_float_ts.o
FE_BLOSC_O := $(addprefix $(FEDIR)/,$(BLOSC_O))
......
......@@ -515,6 +515,11 @@ void GenericIO::write() {
blosc_initialized = true;
}
if (!sz_initialized) {
SZ_Init(NULL);
sz_initialized = true;
}
#ifdef _OPENMP
blosc_set_nthreads(omp_get_max_threads());
}
......
Copyright © 2016 , UChicago Argonne, LLC
All Rights Reserved
[SZ, Version 1.3]
Sheng Di
Dingwen Tao
Franck Cappello
Argonne National Laboratory
OPEN SOURCE LICENSE (license number: SF-16-105)
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Software changes, modifications, or derivative works, should be noted with comments and the author and organization's name.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
4. The software and the end-user documentation included with the redistribution, if any, must include the following acknowledgment:
"This product includes software produced by UChicago Argonne, LLC under Contract No. DE-AC02-06CH11357 with the Department of Energy."
******************************************************************************************************
DISCLAIMER
THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND.
NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
***************************************************************************************************
Contact: Sheng Di (sdi1@anl.gov), Franck Cappello(cappello@mcs.anl.gov)
/**
* @file ByteToolkit.h
* @author Sheng Di
* @date July, 2017
* @brief Header file for the ByteToolkit.c.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _ByteToolkit_H
#define _ByteToolkit_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
//ByteToolkit.c
unsigned short bytesToUInt16_bigEndian(unsigned char* bytes);
unsigned int bytesToUInt32_bigEndian(unsigned char* bytes);
unsigned long bytesToUInt64_bigEndian(unsigned char* b);
short bytesToInt16_bigEndian(unsigned char* bytes);
int bytesToInt32_bigEndian(unsigned char* bytes);
long bytesToInt64_bigEndian(unsigned char* b);
int bytesToInt_bigEndian(unsigned char* bytes);
void intToBytes_bigEndian(unsigned char *b, unsigned int num);
void int64ToBytes_bigEndian(unsigned char *b, uint64_t num);
void int32ToBytes_bigEndian(unsigned char *b, uint32_t num);
void int16ToBytes_bigEndian(unsigned char *b, uint16_t num);
long bytesToLong_bigEndian(unsigned char* b);
void longToBytes_bigEndian(unsigned char *b, unsigned long num);
long doubleToOSEndianLong(double value);
int floatToOSEndianInt(float value);
short getExponent_float(float value);
short getPrecisionReqLength_float(float precision);
short getExponent_double(double value);
short getPrecisionReqLength_double(double precision);
unsigned char numberOfLeadingZeros_Int(int i);
unsigned char numberOfLeadingZeros_Long(long i);
unsigned char getLeadingNumbers_Int(int v1, int v2);
unsigned char getLeadingNumbers_Long(long v1, long v2);
short bytesToShort(unsigned char* bytes);
void shortToBytes(unsigned char* b, short value);
int bytesToInt(unsigned char* bytes);
long bytesToLong(unsigned char* bytes);
float bytesToFloat(unsigned char* bytes);
void floatToBytes(unsigned char *b, float num);
double bytesToDouble(unsigned char* bytes);
void doubleToBytes(unsigned char *b, double num);
int extractBytes(unsigned char* byteArray, size_t k, int validLength);
int getMaskRightCode(int m);
int getLeftMovingCode(int kMod8);
int getRightMovingSteps(int kMod8, int resiBitLength);
int getRightMovingCode(int kMod8, int resiBitLength);
short* convertByteDataToShortArray(unsigned char* bytes, size_t byteLength);
unsigned short* convertByteDataToUShortArray(unsigned char* bytes, size_t byteLength);
void convertShortArrayToBytes(short* states, size_t stateLength, unsigned char* bytes);
void convertUShortArrayToBytes(unsigned short* states, size_t stateLength, unsigned char* bytes);
void convertIntArrayToBytes(int* states, size_t stateLength, unsigned char* bytes);
void convertUIntArrayToBytes(unsigned int* states, size_t stateLength, unsigned char* bytes);
void convertLongArrayToBytes(int64_t* states, size_t stateLength, unsigned char* bytes);
void convertULongArrayToBytes(uint64_t* states, size_t stateLength, unsigned char* bytes);
size_t bytesToSize(unsigned char* bytes);
void sizeToBytes(unsigned char* outBytes, size_t size);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _ByteToolkit_H ----- */
/**
* @file CompressElement.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Compress Elements such as DoubleCompressELement.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include <stdint.h>
#ifndef _CompressElement_H
#define _CompressElement_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct DoubleValueCompressElement
{
double data;
long curValue;
unsigned char curBytes[8]; //big_endian
int reqBytesLength;
int resiBitsLength;
} DoubleValueCompressElement;
typedef struct FloatValueCompressElement
{
float data;
int curValue;
unsigned char curBytes[4]; //big_endian
int reqBytesLength;
int resiBitsLength;
} FloatValueCompressElement;
typedef struct LossyCompressionElement
{
int leadingZeroBytes; //0,1,2,or 3
unsigned char integerMidBytes[8];
int integerMidBytes_Length; //they are mid_bits actually
//char curBytes[8];
//int curBytes_Length; //4 for single_precision or 8 for double_precision
int resMidBitsLength;
int residualMidBits;
} LossyCompressionElement;
char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength);
short computeGroupNum_float(float value);
short computeGroupNum_double(double value);
void listAdd_double(double last3CmprsData[3], double value);
void listAdd_float(float last3CmprsData[3], float value);
void listAdd_int(int64_t last3CmprsData[3], int64_t value);
void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID);
void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID);
int validPrediction_double(double minErr, double precision);
int validPrediction_float(float minErr, float precision);
double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound);
int generateGroupMaxIntervalCount(double* groupErrBounds);
void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes,
int intMidBytes_Length, int resiMidBitsLength, int resiBits);
void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce);
void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes,
int reqBytesLength, int resiBitsLength, LossyCompressionElement *lce);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _CompressElement_H ----- */
/**
* @file DynamicByteArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Byte Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicByteArray_H
#define _DynamicByteArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicByteArray
{
unsigned char* array;
size_t size;
size_t capacity;
} DynamicByteArray;
void new_DBA(DynamicByteArray **dba, size_t cap);
void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes);
void free_DBA(DynamicByteArray *dba);
unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos);
void addDBA_Data(DynamicByteArray *dba, unsigned char value);
void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicByteArray_H ----- */
/**
* @file DynamicDoubleArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Double Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicDoubleArray_H
#define _DynamicDoubleArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicDoubleArray
{
double* array;
size_t size;
double capacity;
} DynamicDoubleArray;
void new_DDA(DynamicDoubleArray **dda, size_t cap);
void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data);
void free_DDA(DynamicDoubleArray *dda);
double getDDA_Data(DynamicDoubleArray *dda, size_t pos);
void addDDA_Data(DynamicDoubleArray *dda, double value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicDoubleArray_H ----- */
/**
* @file DynamicFloatArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Float Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicFloatArray_H
#define _DynamicFloatArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicFloatArray
{
float* array;
size_t size;
size_t capacity;
} DynamicFloatArray;
void new_DFA(DynamicFloatArray **dfa, size_t cap);
void convertDFAtoFloats(DynamicFloatArray *dfa, float **data);
void free_DFA(DynamicFloatArray *dfa);
float getDFA_Data(DynamicFloatArray *dfa, size_t pos);
void addDFA_Data(DynamicFloatArray *dfa, float value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicFloatArray_H ----- */
/**
* @file DynamicIntArray.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for Dynamic Int Array.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _DynamicIntArray_H
#define _DynamicIntArray_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct DynamicIntArray
{
unsigned char* array; //char* (one byte) is enough, don't have to be int*
size_t size;
size_t capacity;
} DynamicIntArray;
void new_DIA(DynamicIntArray **dia, size_t cap);
void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data);
void free_DIA(DynamicIntArray *dia);
int getDIA_Data(DynamicIntArray *dia, size_t pos);
void addDIA_Data(DynamicIntArray *dia, int value);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _DynamicIntArray_H ----- */
/**
* @file Huffman.h
* @author Sheng Di
* @date Aug., 2016
* @brief Header file for the exponential segment constructor.
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _Huffman_H
#define _Huffman_H
#ifdef __cplusplus
extern "C" {
#endif
//Note: when changing the following settings, intvCapacity in sz.h should be changed as well.
//#define allNodes 131072
//#define stateNum 65536
typedef struct node_t {
struct node_t *left, *right;
size_t freq;
char t; //in_node:0; otherwise:1
unsigned int c;
} *node;
typedef struct HuffmanTree {
int stateNum;
int allNodes;
struct node_t* pool;
node *qqq, *qq; //the root node of the HuffmanTree is qq[1]
int n_nodes; //n_nodes is for compression
int qend;
unsigned long **code;
unsigned char *cout;
int n_inode; //n_inode is for decompression
} HuffmanTree;
HuffmanTree* createHuffmanTree(int stateNum);
HuffmanTree* createDefaultHuffmanTree();
node new_node(HuffmanTree *huffmanTree, size_t freq, unsigned int c, node a, node b);
node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t);
void qinsert(HuffmanTree *huffmanTree, node n);
node qremove(HuffmanTree *huffmanTree);
void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2);
void init(HuffmanTree *huffmanTree, int *s, size_t length);
void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize);
void decode(unsigned char *s, size_t targetLength, node t, int *out);
void pad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out);
void unpad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R, unsigned int* C, unsigned char *t, unsigned int i, node root);
void unpad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
void unpad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root);
node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigned char* bytes, int nodeCount);
void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize);
void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out);
void SZ_ReleaseHuffman(HuffmanTree* huffmanTree);
#ifdef __cplusplus
}
#endif
#endif
/**
* @file TightDataPointStorageD.h
* @author Sheng Di
* @date April, 2016
* @brief Header file for the tight data point storage (TDPS).
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _TightDataPointStorageD_H
#define _TightDataPointStorageD_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct TightDataPointStorageD
{
size_t dataSeriesLength;
int allSameData;
double realPrecision;
double medianValue;
char reqLength;
char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression"
int stateNum;
int allNodes;
size_t exactDataNum;
double reservedValue;
unsigned char* rtypeArray;
size_t rtypeArray_size;
unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1)
size_t typeArray_size;
unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1)
size_t leadNumArray_size;
unsigned char* exactMidBytes;
size_t exactMidBytes_size;
unsigned char* residualMidBits;
size_t residualMidBits_size;
unsigned int intervals;
unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no)
size_t segment_size;
unsigned char* pwrErrBoundBytes;
int pwrErrBoundBytes_size;
} TightDataPointStorageD;
void new_TightDataPointStorageD_Empty(TightDataPointStorageD **self);
int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **self, unsigned char* flatBytes, size_t flatBytesLength);
void new_TightDataPointStorageD(TightDataPointStorageD **self,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char resiBitLength,
double realPrecision, double medianValue, char reqLength, unsigned int intervals,
unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo);
void new_TightDataPointStorageD2(TightDataPointStorageD **self,
size_t dataSeriesLength, size_t exactDataNum,
int* type, unsigned char* exactMidBytes, size_t exactMidBytes_size,
unsigned char* leadNumIntArray, //leadNumIntArray contains readable numbers....
unsigned char* resiMidBits, size_t resiMidBits_size,
unsigned char* resiBitLength, size_t resiBitLengthSize,
double realPrecision, double medianValue, char reqLength, unsigned int intervals,
unsigned char* pwrErrBoundBytes, size_t pwrErrBoundBytes_size, unsigned char radExpo);
void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte);
void convertTDPStoBytes_double_reserve(TightDataPointStorageD* tdps, unsigned char* bytes, unsigned char* dsLengthBytes, unsigned char sameByte);
void convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char** bytes, size_t *size);
void convertTDPStoFlatBytes_double_args(TightDataPointStorageD *tdps, unsigned char* bytes, size_t *size);
void free_TightDataPointStorageD(TightDataPointStorageD *tdps);
void free_TightDataPointStorageD2(TightDataPointStorageD *tdps);
#ifdef __cplusplus
}
#endif
#endif /* ----- #ifndef _TightDataPointStorageD_H ----- */
/**
* @file TightDataPointStorageF.h
* @author Sheng Di and Dingwen Tao
* @date Aug, 2016
* @brief Header file for the tight data point storage (TDPS).
* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef _TightDataPointStorageF_H
#define _TightDataPointStorageF_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
typedef struct TightDataPointStorageF
{
size_t dataSeriesLength;
int allSameData;
double realPrecision; //it's used as the pwrErrBoundRatio when errBoundMode==PW_REL
float medianValue;
char reqLength;
char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression"
int stateNum;
int allNodes;
size_t exactDataNum;
float reservedValue;
unsigned char* rtypeArray;
size_t rtypeArray_size;
unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1)
size_t typeArray_size;
unsigned char* leadNumArray; //its size is exactDataNum/4 (or exactDataNum/4+1)
size_t leadNumArray_size;
unsigned char* exactMidBytes;
size_t exactMidBytes_size;
unsigned char* residualMidBits;
size_t residualMidBits_size;
unsigned int intervals; //quantization_intervals
unsigned char isLossless; //a mark to denote whether it's lossless compression (1 is yes, 0 is no)
size_t segment_size;
unsigned char* pwrErrBoundBytes;
int pwrErrBoundBytes_size;
} TightDataPointStorageF;
void new_TightDataPointStorageF_Empty(TightDataPointStorageF **self);
int new_TightDataPointStorageF_fromFlatBytes(TightDataPointStorageF