star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

commit 15717a07e44b95260b92114d87753a399ace4ee2
parent 25db0cef4cc231bbb80f8288ee97b4e9904f40d5
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri, 23 Jan 2026 12:42:24 +0100

No longer compress lines

The compression ratio was actually quite low (less than 2:1), while
access performance was significantly degraded (between 1 and 2 orders of
magnitude), even for linear access, where the naive implementation of
the cache worked well.

Diffstat:
MMakefile | 2--
Mconfig.mk | 5++---
Mshtr.pc.in | 4+---
Msrc/shtr.h | 4+---
Dsrc/shtr_cache.c | 143-------------------------------------------------------------------------------
Dsrc/shtr_cache.h | 54------------------------------------------------------
Msrc/shtr_line_list.c | 384++++++++++---------------------------------------------------------------------
Msrc/shtr_line_list_c.h | 81++++++++++---------------------------------------------------------------------
8 files changed, 61 insertions(+), 616 deletions(-)

diff --git a/Makefile b/Makefile @@ -33,7 +33,6 @@ all: library tests utils ################################################################################ SRC = \ src/shtr.c \ - src/shtr_cache.c \ src/shtr_isotope_metadata.c \ src/shtr_line_list.c \ src/shtr_param.c @@ -66,7 +65,6 @@ libshtr.o: $(OBJ) .config: config.mk $(PKG_CONFIG) --atleast-version $(RSYS_VERSION) rsys - $(PKG_CONFIG) --atleast-version $(ZLIB_VERSION) zlib echo "config done" > $@ .SUFFIXES: .c .d .o diff --git a/config.mk b/config.mk @@ -29,10 +29,9 @@ PCFLAGS_STATIC = --static PCFLAGS = $(PCFLAGS_$(LIB_TYPE)) RSYS_VERSION = 0.14 -ZLIB_VERSION = 1 -INCS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags rsys zlib) -LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs rsys zlib) +INCS = $$($(PKG_CONFIG) $(PCFLAGS) --cflags rsys) +LIBS = $$($(PKG_CONFIG) $(PCFLAGS) --libs rsys) ################################################################################ # Compilation options diff --git a/shtr.pc.in b/shtr.pc.in @@ -3,10 +3,8 @@ includedir=${prefix}/include libdir=${prefix}/lib Requires: rsys >= @RSYS_VERSION@ -Requieres.private: zlib >= @ZLIB_VERSION@ -Name: Star-STL +Name: Star-HITRAN Description: Star HITRAN library Version: @VERSION@ Libs: -L${libdir} -lshtr -Libs.private: -lm CFlags: -I${includedir} diff --git a/src/shtr.h b/src/shtr.h @@ -44,7 +44,6 @@ #define SHTR_MAX_MOLECULES_COUNT 100 #define SHTR_MAX_ISOTOPES_COUNT 10 -#define SHTR_DEFAULT_COMPRESSION INT_MAX struct shtr_isotope { double abundance; /* in ]0, 1] */ @@ -163,9 +162,8 @@ static const struct shtr_create_args SHTR_CREATE_ARGS_DEFAULT = struct shtr_line_list_load_args { const char* filename; /* Name of the file to load or of the provided stream */ FILE* file; /* Stream from where data are loaded. NULL <=> load from file */ - int compression_level; }; -#define SHTR_LINE_LIST_LOAD_ARGS_NULL__ {NULL, NULL, SHTR_DEFAULT_COMPRESSION} +#define SHTR_LINE_LIST_LOAD_ARGS_NULL__ {NULL, NULL} static const struct shtr_line_list_load_args SHTR_LINE_LIST_LOAD_ARGS_NULL = SHTR_LINE_LIST_LOAD_ARGS_NULL__; diff --git a/src/shtr_cache.c b/src/shtr_cache.c @@ -1,143 +0,0 @@ -/* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com) - * Copyright (C) 2025, 2026 Université de Lorraine - * Copyright (C) 2022 Centre National de la Recherche Scientifique - * Copyright (C) 2022 Université Paul Sabatier - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#include "shtr_c.h" -#include "shtr_cache.h" - -#include <rsys/cstr.h> -#include <rsys/mutex.h> -#include <rsys/ref_count.h> - -#include <string.h> /* memcpy */ - -#define CHUNK_ID_NONE SIZE_MAX - -/* Simple, dumb cache structure storing uncompressed lines from the last chunk - * accessed. It should improve linear access performance, but not random access - * performance, which will be disastrous because most accesses will require - * decompressing an entire block of lines, only one of which will be accessed - * before the block is discarded. - * - * TODO: implement a more general LRU cache */ -struct cache { - size_t chunk_id; - struct line lines[NLINES_PER_CHUNK]; - - struct mutex* mutex; - struct shtr* shtr; - ref_T ref; -}; - -/******************************************************************************* - * Helper functions - ******************************************************************************/ -static void -release_cache(ref_T* ref) -{ - struct cache* cache = CONTAINER_OF(ref, struct cache, ref); - struct shtr* shtr = NULL; - ASSERT(ref); - shtr = cache->shtr; - if(cache->mutex) mutex_destroy(cache->mutex); - MEM_RM(shtr->allocator, cache); - SHTR(ref_put(shtr)); -} - -/******************************************************************************* - * Local functions - ******************************************************************************/ -res_T -cache_create(struct shtr* shtr, struct cache** out_cache) -{ - struct cache* cache = NULL; - res_T res = RES_OK; - - ASSERT(shtr && out_cache); /* Pre-conditions */ - - cache = MEM_CALLOC(shtr->allocator, 1, sizeof(*cache)); - if(!cache) { res = RES_MEM_ERR; goto error; } - ref_init(&cache->ref); - SHTR(ref_get(shtr)); - cache->shtr = shtr; - cache->chunk_id = CHUNK_ID_NONE; - - cache->mutex = mutex_create(); - if(!cache->mutex) { res = RES_MEM_ERR; goto error; } - -exit: - *out_cache = cache; - return res; -error: - ERROR(shtr, "Error creating line cache -- %s\n", res_to_cstr(res)); - if(cache) { cache_ref_put(cache); cache = NULL; } - goto exit; -} - -void -cache_ref_get(struct cache* cache) -{ - ASSERT(cache); - ref_get(&cache->ref); -} - -void -cache_ref_put(struct cache* cache) -{ - ASSERT(cache); - ref_put(&cache->ref, release_cache); -} - -res_T -cache_get_line - (struct cache* cache, - const size_t line_id, - struct line* line) -{ - const size_t chunk_id = line_id / NLINES_PER_CHUNK; - const size_t chunk_line_id = line_id % NLINES_PER_CHUNK; - res_T res = RES_OK; - - ASSERT(cache && line); - ASSERT(chunk_id != CHUNK_ID_NONE && chunk_line_id < NLINES_PER_CHUNK); - - mutex_lock(cache->mutex); - if(cache->chunk_id != chunk_id) { - res = RES_BAD_ARG; - } else { - *line = cache->lines[chunk_line_id]; - } - mutex_unlock(cache->mutex); - - return res; -} - -void -cache_put_chunk - (struct cache* cache, - const size_t chunk_id, - const struct line lines[NLINES_PER_CHUNK]) -{ - ASSERT(cache && chunk_id != CHUNK_ID_NONE && lines); - - mutex_lock(cache->mutex); - if(cache->chunk_id != chunk_id) { - cache->chunk_id = chunk_id; - memcpy(cache->lines, lines, sizeof(struct line)*NLINES_PER_CHUNK); - } - mutex_unlock(cache->mutex); -} diff --git a/src/shtr_cache.h b/src/shtr_cache.h @@ -1,54 +0,0 @@ -/* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com) - * Copyright (C) 2025, 2026 Université de Lorraine - * Copyright (C) 2022 Centre National de la Recherche Scientifique - * Copyright (C) 2022 Université Paul Sabatier - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifndef SHTR_CACHE_H -#define SHTR_CACHE_H - -#include "shtr_line_list_c.h" - -/* Forward declarations */ -struct shtr; -struct cache; - -extern LOCAL_SYM res_T -cache_create - (struct shtr* shtr, - struct cache** cache); - -extern LOCAL_SYM void -cache_ref_get - (struct cache* cache); - -extern LOCAL_SYM void -cache_ref_put - (struct cache* cache); - -/* Returns RES_BAD_ARG if the line is not in the cache */ -extern LOCAL_SYM res_T -cache_get_line - (struct cache* cache, - const size_t line_id, - struct line* line); - -extern LOCAL_SYM void -cache_put_chunk - (struct cache* cache, - const size_t chunk_id, - const struct line lines[NLINES_PER_CHUNK]); - -#endif /* SHTR_CACHE_H */ diff --git a/src/shtr_line_list.c b/src/shtr_line_list.c @@ -17,198 +17,14 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include "shtr_c.h" -#include "shtr_cache.h" #include "shtr_line_list_c.h" #include "shtr_param.h" #include <rsys/cstr.h> #include <rsys/text_reader.h> -/* Maximum size of a compressed block, which in the worst case could correspond - * to the initial block size plus an overhead of 6 bytes, in addition to 5 bytes - * per 16 KB of uncompressed data (see https://www.zlib.net/zlib_tech.html) */ -#define ZCHUNK_MAX_SIZE (CHUNK_SIZE + 6 + (5*(CHUNK_SIZE+16383/*ceil*/)/16384)) - -/******************************************************************************* - * Compression API - ******************************************************************************/ -struct zctx { - struct line* lines; /* Uncompressed Lines */ - size_t nlines; /* Number of uncompressed lines */ - - struct line last_line; /* Last line added. Used to check the order of lines */ - - char* zlines; /* Compressed lines */ - - z_stream stream; /* zlib */ - int zlib_is_init; - - struct shtr* shtr; -}; -static const struct zctx ZCTX_NULL = {0}; - -static voidpf -zalloc_func(voidpf opaque, uInt items, uInt size) -{ - ASSERT(opaque); - return MEM_CALLOC((struct mem_allocator*)opaque, items, size); -} - -static void -zfree_func(voidpf opaque, voidpf address) -{ - ASSERT(opaque); - MEM_RM((struct mem_allocator*)opaque, address); -} - -static void -zctx_release(struct zctx* zctx) -{ - ASSERT(zctx); - if(zctx->lines) MEM_RM(zctx->shtr->allocator, zctx->lines); - if(zctx->zlines) MEM_RM(zctx->shtr->allocator, zctx->zlines); - if(zctx->zlib_is_init) deflateEnd(&zctx->stream); - SHTR(ref_put(zctx->shtr)); -} - -static res_T -zctx_init(struct zctx* zctx, struct shtr* shtr, const int level) -{ - int ret = Z_OK; - int z_level = 0; - res_T res = RES_OK; - ASSERT(zctx && shtr); - - *zctx = ZCTX_NULL; - - SHTR(ref_get(shtr)); - zctx->shtr = shtr; - zctx->nlines = 0; - - /* Allocate memory of uncompressed data */ - zctx->lines = MEM_CALLOC - (zctx->shtr->allocator, NLINES_PER_CHUNK, sizeof(*zctx->lines)); - if(!zctx->lines) { res = RES_MEM_ERR; goto error; } - - - /* Define the zlib compression level */ - if(level == SHTR_DEFAULT_COMPRESSION) { - z_level = Z_DEFAULT_COMPRESSION; - } else { - z_level = CLAMP(level, 0, 9); /* zlib compression level in [0,9] */ - } - - if(z_level != 0) { - /* Allocate memory of compressed data */ - zctx->zlines = MEM_ALLOC(zctx->shtr->allocator, ZCHUNK_MAX_SIZE); - if(!zctx->zlines) { res = RES_MEM_ERR; goto error; } - - /* Initialize zlib */ - zctx->stream.zalloc = zalloc_func; - zctx->stream.zfree = zfree_func; - zctx->stream.opaque = zctx->shtr->allocator; - ret = deflateInit(&zctx->stream, z_level); - if(ret != Z_OK) { res = RES_UNKNOWN_ERR; goto error; } - zctx->zlib_is_init = 1; - } - -exit: - return res; -error: - zctx_release(zctx); - goto exit; -} - -static res_T -zctx_deflate(struct zctx* zctx, struct shtr_line_list* list) -{ - struct zchunk zchunk = ZCHUNK_NULL__; - char* block = NULL; - size_t sz_total = 0; - size_t nblocks = 0; - size_t n = 0; - int ret = 0; - res_T res = RES_OK; - - ASSERT(zctx && list); - - if(!zctx->nlines) goto exit; /* Nothing to do */ - - if(!zctx->zlib_is_init) { /* Compression is disabled */ - zchunk.size = (uint32_t)(zctx->nlines * sizeof(*zctx->lines)); - - } else { - /* Setup input/output for zlib */ - zctx->stream.next_in = (unsigned char*)zctx->lines; - zctx->stream.avail_in = (uInt)(zctx->nlines * sizeof(*zctx->lines)); - zctx->stream.next_out = (unsigned char*)zctx->zlines; - zctx->stream.avail_out = ZCHUNK_MAX_SIZE; - - /* Compress */ - ret = deflate(&zctx->stream, Z_FINISH); - if(ret != Z_STREAM_END) { res = RES_UNKNOWN_ERR; goto error; } - - CHK(deflateReset(&zctx->stream) == Z_OK); - - /* Calculate the size after compression */ - zchunk.size = ZCHUNK_MAX_SIZE - zctx->stream.avail_out; - } - - /* Calculate the total size already allocated for compressed lines */ - nblocks = darray_charp_size_get(&list->blocks); - sz_total = nblocks * BLOCK_SIZE; - - /* Check that the last memory block has enough space to store the compressed - * chunk */ - n = darray_zchunk_size_get(&list->zchunks); - if(n) { /* Is there a block? */ - struct zchunk* prev_chunk = &darray_zchunk_data_get(&list->zchunks)[n-1]; - size_t sz_in_use = prev_chunk->offset + prev_chunk->size; - size_t sz_remain = sz_total - sz_in_use; - - if(sz_remain > zchunk.size) { - zchunk.offset = sz_in_use; - block = darray_charp_data_get(&list->blocks)[nblocks-1]; - } - } - - /* No memory available. Allocate a new block */ - if(!block) { - block = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE); - if(!block) { res = RES_MEM_ERR; goto error; } - - res = darray_charp_push_back(&list->blocks, &block); - if(res != RES_OK) goto error; - - zchunk.offset = sz_total; - } - - /* Register the chunk */ - res = darray_zchunk_push_back(&list->zchunks, &zchunk); - if(res != RES_OK) goto error; - - if(zctx->zlib_is_init) { - /* Save compressed chunk data */ - memcpy(block + zchunk.offset % BLOCK_SIZE, zctx->zlines, zchunk.size); - } else { - /* Save un-compressed chunk data */ - memcpy(block + zchunk.offset % BLOCK_SIZE, zctx->lines, zchunk.size); - } - - /* Update the number of fully recorded lines, - * i.e., compressed and stored in the list */ - list->nlines += zctx->nlines; - - /* No lines waiting for compression. */ - zctx->nlines = 0; - -exit: - return res; -error: - ERROR(list->shtr, "Error while compressing lines -- %s\n", - zctx->stream.msg ? zctx->stream.msg : res_to_cstr(res)); - goto exit; -} +/* Maximum number of lines that can be stored in a memory block */ +#define NLINES_PER_BLOCK (BLOCK_SIZE/sizeof(struct line)) /******************************************************************************* * Helper functions @@ -242,13 +58,9 @@ create_line_list ref_init(&list->ref); SHTR(ref_get(shtr)); list->shtr = shtr; - darray_zchunk_init(shtr->allocator, &list->zchunks); darray_charp_init(shtr->allocator, &list->blocks); list->info = SHTR_LINE_LIST_INFO_NULL; - res = cache_create(shtr, &list->cache); - if(res != RES_OK) goto error; - exit: *out_list = list; return res; @@ -260,27 +72,15 @@ error: goto exit; } -static res_T -setup_zlib(struct shtr_line_list* list) +static INLINE const struct line* +get_line(const struct shtr_line_list* list, const size_t i) { - int ret = Z_OK; /* zlib */ - res_T res = RES_OK; - ASSERT(list); - - list->z_stream.zalloc = zalloc_func; - list->z_stream.zfree = zfree_func; - list->z_stream.opaque = list->shtr->allocator; - ret = inflateInit(&list->z_stream); - if(ret != Z_OK) { res = RES_UNKNOWN_ERR; goto error; } + const size_t iblock = i / NLINES_PER_BLOCK; + const size_t iline = i % NLINES_PER_BLOCK; - list->zlib_is_init = 1; - -exit: - return res; -error: - ERROR(list->shtr, - "Error intializing line decompressor -- %s\n", res_to_cstr(res)); - goto exit; + ASSERT(list && i < list->nlines); + ASSERT(iblock < darray_charp_size_get(&list->blocks)); + return (struct line*)(darray_charp_cdata_get(&list->blocks))[iblock] + iline; } static void @@ -359,22 +159,24 @@ static res_T register_line (struct shtr_line_list* list, const struct txtrdr* txtrdr, - const struct shtr_line* line, - struct zctx* zctx) + const struct shtr_line* line) { struct shtr_line ln = SHTR_LINE_NULL; + struct line* lines = NULL; struct line ln_encoded = LINE_NULL; + size_t iblock = 0; /* Index of the block in which the line is stored */ + size_t iline = 0; /* Index of the line in the block */ res_T res = RES_OK; /* Pre-conditions */ ASSERT(list && txtrdr && line); - ASSERT(zctx && zctx->nlines < NLINES_PER_CHUNK); line_encode(line, &ln_encoded); /* Check if a line has been saved. If so, ensure that the lines are sorted */ - if(darray_zchunk_size_get(&list->zchunks) || zctx->nlines) { - if(zctx->last_line.wavenumber > ln_encoded.wavenumber) { + if(list->nlines) { + const struct line* ln_encoded_prev = get_line(list, list->nlines-1); + if(ln_encoded_prev->wavenumber > ln_encoded.wavenumber) { ERROR(list->shtr, "%s:%lu: lines are not sorted in ascending order wrt their wavenumber.\n", txtrdr_get_name(txtrdr), txtrdr_get_line_num(txtrdr)); @@ -383,16 +185,24 @@ register_line } } - zctx->last_line = ln_encoded; - zctx->lines[zctx->nlines] = ln_encoded; - zctx->nlines += 1; + iblock = list->nlines / NLINES_PER_BLOCK; + iline = list->nlines % NLINES_PER_BLOCK; - /* The chunk is full. Compress it */ - if(zctx->nlines == NLINES_PER_CHUNK) { - res = zctx_deflate(zctx, list); + /* Ensure there is sufficient space to store the line */ + if(iline == 0) { + /* There is no more space in the last allocated block. Allocate a new one. */ + char* block = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE); + if(!block) { res = RES_MEM_ERR; goto error; } + + res = darray_charp_push_back(&list->blocks, &block); if(res != RES_OK) goto error; } + /* Store the encoded line */ + lines = (struct line*)darray_charp_data_get(&list->blocks)[iblock]; + lines[iline] = ln_encoded; + ++list->nlines; + line_decode(&ln_encoded, &ln); ASSERT(ln.molecule_id == line->molecule_id); ASSERT(ln.isotope_id_local == line->isotope_id_local); @@ -528,7 +338,6 @@ load_stream const struct shtr_line_list_load_args* args, struct shtr_line_list** out_lines) { - struct zctx zctx = ZCTX_NULL; struct shtr_line_list* list = NULL; struct txtrdr* txtrdr = NULL; const char* name = NULL; @@ -545,14 +354,6 @@ load_stream res = create_line_list(shtr, &list); if(res != RES_OK) goto error; - if(args->compression_level > 0) { - res = setup_zlib(list); - if(res != RES_OK) goto error; - } - - res = zctx_init(&zctx, shtr, args->compression_level); - if(res != RES_OK) goto error; - res = txtrdr_stream(list->shtr->allocator, stream, name, 0/*No comment char*/, &txtrdr); if(res != RES_OK) { @@ -576,17 +377,12 @@ load_stream res = parse_line(list, txtrdr, &ln); if(res != RES_OK) goto error; - res = register_line(list, txtrdr, &ln, &zctx); + res = register_line(list, txtrdr, &ln); if(res != RES_OK) goto error; } - /* Ensure that remaining lines are compressed and stored */ - res = zctx_deflate(&zctx, list); - if(res != RES_OK) goto error; - exit: if(txtrdr) txtrdr_ref_put(txtrdr); - zctx_release(&zctx); *out_lines = list; return res; error: @@ -597,54 +393,6 @@ error: goto exit; } -static res_T -decompress_zchunk - (struct shtr_line_list* list, - const size_t chunk_id, - struct line lines[NLINES_PER_CHUNK]) -{ - const struct zchunk* zchunk = NULL; - char* block = NULL; - size_t block_id = 0; - size_t block_offset = 0; - int ret = Z_OK; /* zlib */ - res_T res = RES_OK; - - ASSERT(list && lines && chunk_id < darray_zchunk_size_get(&list->zchunks)); - - zchunk = darray_zchunk_cdata_get(&list->zchunks) + chunk_id; - block_id = zchunk->offset / BLOCK_SIZE; - block_offset = zchunk->offset % BLOCK_SIZE; - - block = darray_charp_cdata_get(&list->blocks)[block_id]; - - if(!list->zlib_is_init) { - /* Data are not compressed */ - memcpy(lines, block+block_offset, zchunk->size); - - } else { - list->z_stream.next_in = (unsigned char*)(block + block_offset); - list->z_stream.avail_in = (uInt)zchunk->size; - list->z_stream.next_out = (unsigned char*)lines; - list->z_stream.avail_out = (uInt)(sizeof(struct line)*NLINES_PER_CHUNK); - ret = inflate(&list->z_stream, Z_FINISH); - if(ret != Z_STREAM_END) { - ASSERT(list->z_stream.msg); - ERROR(list->shtr, "Error decompressing the chunk of lines -- %s\n", - list->z_stream.msg); - res = RES_UNKNOWN_ERR; - goto error; - } - - CHK(inflateReset(&list->z_stream) == Z_OK); - } - -exit: - return res; -error: - goto exit; -} - static void release_lines(ref_T * ref) { @@ -657,14 +405,10 @@ release_lines(ref_T * ref) shtr = list->shtr; - if(list->cache) cache_ref_put(list->cache); - if(list->zlib_is_init) inflateEnd(&list->z_stream); - n = darray_charp_size_get(&list->blocks); blocks = darray_charp_data_get(&list->blocks); FOR_EACH(i, 0, n) { if(blocks[i]) MEM_RM(shtr->allocator, blocks[i]); } - darray_zchunk_release(&list->zchunks); darray_charp_release(&list->blocks); MEM_RM(shtr->allocator, list); SHTR(ref_put(shtr)); @@ -715,9 +459,9 @@ shtr_line_list_create_from_stream struct shtr_line_list** out_list) { struct shtr_line_list* list = NULL; + size_t nblocks = 0; char** blocks = NULL; - size_t i=0, n=0; - int is_compression_enabled = 0; + size_t i = 0; int version = 0; res_T res = RES_OK; @@ -755,20 +499,18 @@ shtr_line_list_create_from_stream } READ(&list->nlines, 1); - READ(&is_compression_enabled, 1); - - /* Memory descriptor of compressed chunks */ - READ(&n, 1); - if((res = darray_zchunk_resize(&list->zchunks, n)) != RES_OK) goto error; - READ(darray_zchunk_data_get(&list->zchunks), n); + nblocks = (list->nlines + (NLINES_PER_BLOCK-1)/*ceil*/) / NLINES_PER_BLOCK; - /* Compressed data stored in memory blocks */ - READ(&n, 1); - if((res = darray_charp_resize(&list->blocks, n)) != RES_OK) goto error; + /* Line stored in memory blocks */ + if((res = darray_charp_resize(&list->blocks, nblocks)) != RES_OK) goto error; blocks = darray_charp_data_get(&list->blocks); - FOR_EACH(i, 0, n) { + FOR_EACH(i, 0, nblocks) { blocks[i] = MEM_ALLOC(list->shtr->allocator, BLOCK_SIZE); - if(!blocks[i]) { res = RES_MEM_ERR; goto error; } + if(!blocks[i]) { + ERROR(shtr, "%s: error allocating memory block\n", FUNC_NAME); + res = RES_MEM_ERR; + goto error; + } READ(blocks[i], BLOCK_SIZE); } @@ -777,11 +519,6 @@ shtr_line_list_create_from_stream #undef READ - if(is_compression_enabled) { - res = setup_zlib(list); - if(res != RES_OK) goto error; - } - exit: if(out_list) *out_list = list; return res; @@ -822,30 +559,12 @@ shtr_line_list_at const size_t i, struct shtr_line* line) { - struct line ln_encoded = LINE_NULL; - res_T res = RES_OK; + const struct line* ln_encoded = NULL; if(!list || !line || i >= list->nlines) return RES_BAD_ARG; - - res = cache_get_line(list->cache, i, &ln_encoded); - - if(res != RES_OK) { /* Cache miss */ - const size_t chunk_id = i / NLINES_PER_CHUNK; - const size_t line_id = i % NLINES_PER_CHUNK; - struct line lines[NLINES_PER_CHUNK]; - - if((res = decompress_zchunk(list, chunk_id, lines)) != RES_OK) goto error; - cache_put_chunk(list->cache, chunk_id, lines); - - ln_encoded = lines[line_id]; - } - - line_decode(&ln_encoded, line); - -exit: - return res; -error: - goto exit; + ln_encoded = get_line(list, i); + line_decode(ln_encoded, line); + return RES_OK; } res_T @@ -874,18 +593,9 @@ shtr_line_list_write /* Number of lines in the list */ WRITE(&list->nlines, 1); - /* Is decompression enabled */ - WRITE(&list->zlib_is_init, 1); - - /* Memory descriptor of compressed chunks */ - n = darray_zchunk_size_get(&list->zchunks); - WRITE(&n, 1); - WRITE(darray_zchunk_cdata_get(&list->zchunks), n); - - /* Compressed data stored in memory blocks */ + /* Lines stored in memory blocks. */ blocks = darray_charp_cdata_get(&list->blocks); n = darray_charp_size_get(&list->blocks); - WRITE(&n, 1); FOR_EACH(i, 0, n) { WRITE(blocks[i], BLOCK_SIZE); } /* Informations on line parameters */ diff --git a/src/shtr_line_list_c.h b/src/shtr_line_list_c.h @@ -24,8 +24,6 @@ #include <rsys/dynamic_array.h> #include <rsys/ref_count.h> -#include <zlib.h> - /* * Brief summary of the design * @@ -33,65 +31,18 @@ * reduce the memory footprint. Several line parameters are therefore encoded * with reduced precision (see “struct line”). * - * In addition, lines are first stored in a chunk of CHUNK_SIZE bytes which, - * once filled, is compressed using zlib. The compressed data is then stored in - * a fixed-size memory block. Each chunk can be accessed and decompressed - * separately. The memory descriptors for the compressed chunks, i.e., the data - * defining the location where the compressed data is stored in the memory - * blocks (see “struct zchunk”), are stored in a dynamic array in the order in - * which the lines are loaded. Similarly, the memory blocks that contain the - * compressed data are also listed in a dynamic array in the order in which the - * lines are read. Therefore, the index of a line is sufficient to index the - * chunk in which the line is stored, and finally the memory block in which its - * parameters are compressed. - * - * The use of dynamic arrays allows this access by simple indexing, but at the - * cost of memory overhead due to the dynamic array allocation policy (up to - * twice the required size in the worst case). However, the memory space in - * question here is not a major problem, since the zchunk structure and the - * pointer to a memory block take up very little space. And while linked lists - * could have been used instead, they would not only have complicated data - * access, with pointers replacing indexing, but they would also have - * complicated data [de]serialization, precisely because of the use of pointers - * instead of indexes. - * - * Note that the use of memory blocks stored in dynamic arrays rather than a - * simple dynamic array of contiguous bytes is motivated by the issue of - * additional memory overhead associated with the use of dynamic arrays. As said - * above, on the worst case, the memory overhead here is equal to twice the - * number of blocks multiplied by the size of a pointer, compared to twice the - * size required to store all the lines. - * - * A cache is ultimately used to speed up access to lines, which must now be - * decompressed. This cache stores the decompressed blocks in which the most - * recently accessed lines are stored. The implementation of this cache is - * independent of the line storage. However, it must be thread-safe to allow - * simultaneous access. + * Lines are saved in memory blocks of BLOCK_SIZE, stored in a dynamic arrayf. + * Using a dynamic array of memory blocks rather than a simple dynamic array of + * contiguous bytes is motivated by the issue of additional memory overhead + * associated with the use of dynamic arrays for which the overall number of + * entries is unknown. On the worst case, the memory overhead here is equal to + * twice the number of blocks multiplied by the size of a pointer, compared to + * twice the size required to store all the lines. */ /* Size in bytes of a memory block in which compressed data is stored */ #define BLOCK_SIZE (1024*1024) -/* Size in bytes of an uncompressed chunk */ -#define CHUNK_SIZE (64*1024) - -/* Number of lines in a chunk */ -#define NLINES_PER_CHUNK (CHUNK_SIZE/sizeof(struct line)) - -/* Memory descriptor of a compressed chunk */ -struct zchunk { - /* Offset to chunk data. The offset is indicated as if the compressed data - * were stored sequentially. However, the data is stored in memory blocks of - * fixed size. The offset therefore defines both the block index - * (offset/BLOCK_SIZE) and the offset within the block (offset%BLOCK_SIZE) */ - size_t offset; - - /* Size in bytes of the compressed chunk */ - uint32_t size; -}; -#define ZCHUNK_NULL__ {0} -static const struct zchunk ZCHUNK_NULL = ZCHUNK_NULL__; - struct line { double wavenumber; /* Central wavenumber in vacuum [cm^-1] */ double intensity; /* Reference intensity [cm^-1/(molec.cm^2)] */ @@ -122,12 +73,7 @@ static const struct line LINE_NULL = LINE_NULL__; STATIC_ASSERT(sizeof(struct line)==32, Unexpected_sizeof_struct_line); -/* Generate the dynamic array of zchunk */ -#define DARRAY_NAME zchunk -#define DARRAY_DATA struct zchunk -#include <rsys/dynamic_array.h> - -/* Generate he dynamic array of char* */ +/* Generate the dynamic array of char*, the dynamic array of memory blocks */ #define DARRAY_NAME charp #define DARRAY_DATA char* #include <rsys/dynamic_array.h> @@ -140,20 +86,13 @@ static const int SHTR_LINE_LIST_VERSION = 2; struct cache; struct shtr_line_list { - /* Compressed lines sorted in ascending order wrt their wavenumber */ - struct darray_zchunk zchunks; /* Accessor to compressed lines */ + /* Lines sorted in ascending order wrt their wavenumber */ struct darray_charp blocks; /* Memory where compressed lines are stored */ - size_t nlines; /* Number of lines */ + size_t nlines; /* Overall number of lines */ /* Informations on line parameters */ struct shtr_line_list_info info; - struct cache* cache; - - /* zlib */ - z_stream z_stream; - int zlib_is_init; - struct shtr* shtr; ref_T ref; };