10 #ifndef __BITSTRING_H__ 11 #define __BITSTRING_H__ 54 word_len = width*nbits;
57 for (int32_t j=0; j<word_len; j++)
58 mask=(mask<<1) | (uint64_t) 1;
59 mask<<=
sizeof(uint64_t)*8-word_len;
62 for (int32_t j=0; j<nbits; j++)
63 mask=(mask<<1) | (uint64_t) 1;
89 uint64_t stream_len=len/
sizeof(uint64_t)+1;
90 string=SG_MALLOC(uint64_t, stream_len);
96 uint64_t nfit=8*
sizeof(w)/nbits;
97 for (uint64_t i=0; i<len; i++)
99 w= (w << nbits) | alphabet->
remap_to_bin((uint8_t) str[j]);
101 if (i % nfit == nfit-1)
135 if (!id_len ||
id[0]!=
'>')
136 SG_SERROR(
"No fasta hunks (lines starting with '>') found\n")
143 int32_t spanned_lines=0;
154 SG_SERROR(
"Error reading fasta entry in line %d len=%ld", spanned_lines+1, len)
157 SG_SERROR(
"Multiple fasta hunks (lines starting with '>') are not supported!\n")
166 uint64_t nfit=8*
sizeof(w)/nbits;
168 len = fasta_len-spanned_lines;
169 uint64_t stream_len=len/(nfit)+1;
170 string=SG_MALLOC(uint64_t, stream_len);
176 for (int32_t j=0; j<fasta_len; j++, k++)
184 w= (w << nbits) | alphabet->
remap_to_bin((uint8_t) fasta[j]);
186 if (k % nfit == nfit-1)
195 string[idx]=w<<(nbits*(nfit - k%nfit));
224 uint64_t stream_len=len/
sizeof(uint64_t)+1;
225 string=SG_MALLOC(uint64_t, stream_len);
268 int32_t ws=8*
sizeof(uint64_t);
269 uint64_t i=bitindex/ws;
270 int32_t j=bitindex % ws;
271 int32_t missing=word_len-(ws-j);
274 uint64_t res= ((
string[i] << j) & mask ) >> (ws-word_len);
277 res|= (
string[i+1] >> (ws-missing) );
294 int32_t ws=8*
sizeof(uint64_t);
295 uint64_t i=bitindex/ws;
296 int32_t j=bitindex % ws;
297 int32_t missing=word_len-(ws-j);
299 uint64_t sl = j-word_len;
317 string[i] = (
string[i] & (~ml) ) | ( wl & ml);
323 string[i+1] = (
string[i+1] & (~mr) ) | ( wr & mr);
332 virtual const char*
get_name()
const {
return "BitString"; }
346 uint64_t single_mask;
349 #endif //__BITSTRING_H__
a string class embedding a string in a compact bit representation
EAlphabet
Alphabet of charfeatures/observations
virtual const char * get_name() const
char * get_line(uint64_t &len, uint64_t &offs)
int32_t get_num_bits() const
The class Alphabet implements an alphabet and alphabet utility functions.
uint8_t remap_to_bin(uint8_t c)
void set_binary_word(uint16_t word, uint64_t index)
uint64_t get_length() const
Class SGObject is the base class of all shogun objects.
void load_fasta_file(const char *fname, bool ignore_invalid=false)
static void fill_vector(T *vec, int32_t len, T value)
void set_string(uint64_t *str, uint64_t len)
all of classes and functions are contained in the shogun namespace
uint64_t operator[](uint64_t index) const
void obtain_from_char(char *str, uint64_t len)
void create(uint64_t len)
#define SG_UNSTABLE(func,...)
CBitString(EAlphabet alpha, int32_t width=1)