SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 00011 #ifndef __DATATYPE_H__ 00012 #define __DATATYPE_H__ 00013 00014 #include <shogun/lib/common.h> 00015 //#include <shogun/mathematics/Math.h> 00016 #include <shogun/io/SGIO.h> 00017 00018 #define PT_NOT_GENERIC PT_SGOBJECT 00019 #define PT_LONGEST floatmax_t 00020 00021 namespace shogun 00022 { 00023 00024 //class CMath; 00025 template<class T> class CCache; 00026 00028 typedef int32_t index_t; 00029 00031 template<class T> class SGVector 00032 { 00033 public: 00035 SGVector() : vector(NULL), vlen(0), do_free(false) { } 00036 00038 SGVector(T* v, index_t len, bool free_vec=false) 00039 : vector(v), vlen(len), do_free(free_vec) { } 00040 00042 SGVector(index_t len, bool free_vec=false) 00043 : vlen(len), do_free(free_vec) 00044 { 00045 vector=SG_MALLOC(T, len); 00046 } 00047 00049 SGVector(const SGVector &orig) 00050 : vector(orig.vector), vlen(orig.vlen), do_free(orig.do_free) { } 00051 00053 virtual ~SGVector() 00054 { 00055 } 00056 00061 static SGVector get_vector(SGVector &src, bool own=true) 00062 { 00063 if (!own) 00064 return src; 00065 00066 src.do_free=false; 00067 return SGVector(src.vector, src.vlen); 00068 } 00069 00071 void zero() 00072 { 00073 if (vector && vlen) 00074 set_const(0); 00075 } 00076 00078 void set_const(T const_elem) 00079 { 00080 for (index_t i=0; i<vlen; i++) 00081 vector[i]=const_elem ; 00082 } 00083 00085 void range_fill(T start=0) 00086 { 00087 range_fill_vector(vector, vlen, start); 00088 } 00089 00091 void random(T min_value, T max_value) 00092 { 00093 random_vector(vector, vlen, min_value, max_value); 00094 } 00095 00097 void randperm() 00098 { 00099 randperm(vector, vlen); 00100 } 00101 00103 template <class VT> 00104 static VT* clone_vector(const VT* vec, int32_t len) 00105 { 00106 VT* result = SG_MALLOC(VT, len); 00107 for (int32_t i=0; i<len; i++) 00108 result[i]=vec[i]; 00109 00110 return result; 00111 } 00112 00114 template <class VT> 00115 static void fill_vector(VT* vec, int32_t len, VT value) 00116 { 00117 for (int32_t i=0; i<len; i++) 00118 vec[i]=value; 00119 } 00120 00122 template <class VT> 00123 static void range_fill_vector(VT* vec, int32_t len, VT start=0) 00124 { 00125 for (int32_t i=0; i<len; i++) 00126 vec[i]=i+start; 00127 } 00128 00130 template <class VT> 00131 static void random_vector(VT* vec, int32_t len, VT min_value, VT max_value) 00132 { 00133 //FIXME for (int32_t i=0; i<len; i++) 00134 //FIXME vec[i]=CMath::random(min_value, max_value); 00135 } 00136 00138 template <class VT> 00139 static void randperm(VT* perm, int32_t n) 00140 { 00141 for (int32_t i = 0; i < n; i++) 00142 perm[i] = i; 00143 permute(perm,n); 00144 } 00145 00147 template <class VT> 00148 static void permute(VT* perm, int32_t n) 00149 { 00150 //FIXME for (int32_t i = 0; i < n; i++) 00151 //FIXME CMath::swap(perm[random(0, n - 1)], perm[i]); 00152 } 00153 00159 const T& get_element(index_t index) 00160 { 00161 ASSERT(vector && (index>=0) && (index<vlen)); 00162 return vector[index]; 00163 } 00164 00171 void set_element(const T& p_element, index_t index) 00172 { 00173 ASSERT(vector && (index>=0) && (index<vlen)); 00174 vector[index]=p_element; 00175 } 00176 00182 void resize_vector(int32_t n) 00183 { 00184 vector=SG_REALLOC(T, vector, n); 00185 00186 if (n > vlen) 00187 memset(&vector[vlen], 0, (n-vlen)*sizeof(T)); 00188 vlen=n; 00189 } 00190 00196 inline const T& operator[](index_t index) const 00197 { 00198 return vector[index]; 00199 } 00200 00206 inline T& operator[](index_t index) 00207 { 00208 return vector[index]; 00209 } 00210 00212 virtual void free_vector() 00213 { 00214 if (do_free) 00215 SG_FREE(vector); 00216 00217 vector=NULL; 00218 do_free=false; 00219 vlen=0; 00220 } 00221 00223 virtual void destroy_vector() 00224 { 00225 do_free=true; 00226 free_vector(); 00227 } 00228 00230 void display_size() const 00231 { 00232 SG_SPRINT("SGVector '%p' of size: %d\n", vector, vlen); 00233 } 00234 00236 void display_vector() const 00237 { 00238 display_size(); 00239 for (int32_t i=0; i<vlen; i++) 00240 SG_SPRINT("%10.10g,", (float64_t) vector[i]); 00241 SG_SPRINT("\n"); 00242 } 00243 00244 public: 00246 T* vector; 00248 index_t vlen; 00250 bool do_free; 00251 }; 00252 00253 //template<class T> class SGCachedVector : public SGVector<T> 00254 //{ 00255 // public: 00256 // /** default constructor */ 00257 // SGCachedVector(CCache<T>* c, index_t i) 00258 // : SGVector<T>(), cache(c), idx(i) 00259 // { 00260 // } 00261 // 00262 // /** constructor for setting params */ 00263 // SGCachedVector(CCache<T>* c, index_t i, 00264 // T* v, index_t len, bool free_vec=false) 00265 // : SGVector<T>(v, len, free_vec), cache(c), idx(i) 00266 // { 00267 // } 00268 // 00269 // /** constructor to create new vector in memory */ 00270 // SGCachedVector(CCache<T>* c, index_t i, index_t len, bool free_vec=false) : 00271 // SGVector<T>(len, free_vec), cache(c), idx(i) 00272 // { 00273 // } 00274 // 00275 // /** free vector */ 00276 // virtual void free_vector() 00277 // { 00278 // //clean up cache fixme 00279 // SGVector<T>::free_vector(); 00280 // } 00281 // 00282 // /** destroy vector */ 00283 // virtual void destroy_vector() 00284 // { 00285 // //clean up cache fixme 00286 // SGVector<T>::destroy_vector(); 00287 // if (cache) 00288 // cache->unlock_entry(idx); 00289 // } 00290 // 00291 // public: 00292 // /** idx */ 00293 // index_t idx; 00294 // 00295 // /** cache */ 00296 // CCache<T>* cache; 00297 //}; 00298 00300 template<class T> class SGMatrix 00301 { 00302 public: 00304 SGMatrix() : matrix(NULL), num_rows(0), num_cols(0), do_free(false) { } 00305 00307 SGMatrix(T* m, index_t nrows, index_t ncols, bool free_mat=false) 00308 : matrix(m), num_rows(nrows), num_cols(ncols), do_free(free_mat) { } 00309 00311 SGMatrix(index_t nrows, index_t ncols, bool free_mat=false) 00312 : num_rows(nrows), num_cols(ncols), do_free(free_mat) 00313 { 00314 matrix=SG_MALLOC(T, nrows*ncols); 00315 } 00316 00318 SGMatrix(const SGMatrix &orig) 00319 : matrix(orig.matrix), num_rows(orig.num_rows), 00320 num_cols(orig.num_cols), do_free(orig.do_free) { } 00321 00323 virtual ~SGMatrix() 00324 { 00325 } 00326 00328 virtual void free_matrix() 00329 { 00330 if (do_free) 00331 SG_FREE(matrix); 00332 00333 matrix=NULL; 00334 do_free=false; 00335 num_rows=0; 00336 num_cols=0; 00337 } 00338 00340 virtual void destroy_matrix() 00341 { 00342 do_free=true; 00343 free_matrix(); 00344 } 00345 00349 inline const T& operator[](index_t index) const 00350 { 00351 return matrix[index]; 00352 } 00353 00357 inline T& operator[](index_t index) 00358 { 00359 return matrix[index]; 00360 } 00361 00362 public: 00364 T* matrix; 00366 index_t num_rows; 00368 index_t num_cols; 00370 bool do_free; 00371 }; 00372 00374 template<class T> class SGNDArray 00375 { 00376 public: 00378 SGNDArray() : array(NULL), dims(NULL), num_dims(0) { } 00379 00381 SGNDArray(T* a, index_t* d, index_t nd) 00382 : array(a), dims(d), num_dims(nd) { } 00383 00385 SGNDArray(const SGNDArray &orig) 00386 : array(orig.array), dims(orig.dims), num_dims(orig.num_dims) { } 00387 00388 public: 00390 T* array; 00392 index_t* dims; 00394 index_t num_dims; 00395 }; 00396 00398 template<class T> class SGString 00399 { 00400 public: 00402 SGString() : string(NULL), slen(0), do_free(false) { } 00403 00405 SGString(T* s, index_t l, bool free_s=false) 00406 : string(s), slen(l), do_free(free_s) { } 00407 00409 SGString(SGVector<T> v) 00410 : string(v.vector), slen(v.vlen), do_free(v.do_free) { } 00411 00413 SGString(index_t len, bool free_s=false) : 00414 slen(len), do_free(free_s) 00415 { 00416 string=SG_MALLOC(T, len); 00417 } 00418 00420 SGString(const SGString &orig) 00421 : string(orig.string), slen(orig.slen), do_free(orig.do_free) { } 00422 00424 void free_string() 00425 { 00426 if (do_free) 00427 SG_FREE(string); 00428 00429 string=NULL; 00430 do_free=false; 00431 slen=0; 00432 } 00433 00435 void destroy_string() 00436 { 00437 do_free=true; 00438 free_string(); 00439 } 00440 00441 public: 00443 T* string; 00445 index_t slen; 00447 bool do_free; 00448 }; 00449 00451 template <class T> struct SGStringList 00452 { 00453 public: 00455 SGStringList() : num_strings(0), max_string_length(0), strings(NULL), 00456 do_free(false) { } 00457 00459 SGStringList(SGString<T>* s, index_t num_s, index_t max_length, 00460 bool free_strings=false) : num_strings(num_s), 00461 max_string_length(max_length), strings(s), do_free(free_strings) { } 00462 00464 SGStringList(index_t num_s, index_t max_length, bool free_strings=false) 00465 : num_strings(num_s), max_string_length(max_length), 00466 do_free(free_strings) 00467 { 00468 strings=SG_MALLOC(SGString<T>, num_strings); 00469 } 00470 00472 SGStringList(const SGStringList &orig) : 00473 num_strings(orig.num_strings), 00474 max_string_length(orig.max_string_length), 00475 strings(orig.strings), do_free(orig.do_free) { } 00476 00478 void free_list() 00479 { 00480 if (do_free) 00481 SG_FREE(strings); 00482 00483 strings=NULL; 00484 do_free=false; 00485 num_strings=0; 00486 max_string_length=0; 00487 } 00488 00490 void destroy_list() 00491 { 00492 do_free=true; 00493 free_list(); 00494 } 00495 00496 public: 00498 index_t num_strings; 00499 00501 index_t max_string_length; 00502 00504 SGString<T>* strings; 00505 00507 bool do_free; 00508 }; 00509 00511 template <class T> struct SGSparseVectorEntry 00512 { 00514 index_t feat_index; 00516 T entry; 00517 }; 00518 00520 template <class T> class SGSparseVector 00521 { 00522 public: 00524 SGSparseVector() : 00525 vec_index(0), num_feat_entries(0), features(NULL), do_free(false) {} 00526 00528 SGSparseVector(SGSparseVectorEntry<T>* feats, index_t num_entries, 00529 index_t index, bool free_v=false) : 00530 vec_index(index), num_feat_entries(num_entries), features(feats), 00531 do_free(free_v) {} 00532 00534 SGSparseVector(index_t num_entries, index_t index, bool free_v=false) : 00535 vec_index(index), num_feat_entries(num_entries), do_free(free_v) 00536 { 00537 features=SG_MALLOC(SGSparseVectorEntry<T>, num_feat_entries); 00538 } 00539 00541 SGSparseVector(const SGSparseVector& orig) : 00542 vec_index(orig.vec_index), num_feat_entries(orig.num_feat_entries), 00543 features(orig.features), do_free(orig.do_free) {} 00544 00546 void free_vector() 00547 { 00548 if (do_free) 00549 SG_FREE(features); 00550 00551 features=NULL; 00552 do_free=false; 00553 vec_index=0; 00554 num_feat_entries=0; 00555 } 00556 00558 void destroy_vector() 00559 { 00560 do_free=true; 00561 free_vector(); 00562 } 00563 00564 public: 00566 index_t vec_index; 00567 00569 index_t num_feat_entries; 00570 00572 SGSparseVectorEntry<T>* features; 00573 00575 bool do_free; 00576 }; 00577 00579 template <class T> class SGSparseMatrix 00580 { 00581 public: 00583 SGSparseMatrix() : 00584 num_vectors(0), num_features(0), sparse_matrix(NULL), 00585 do_free(false) { } 00586 00587 00589 SGSparseMatrix(SGSparseVector<T>* vecs, index_t num_feat, 00590 index_t num_vec, bool free_m=false) : 00591 num_vectors(num_vec), num_features(num_feat), 00592 sparse_matrix(vecs), do_free(free_m) { } 00593 00595 SGSparseMatrix(index_t num_vec, index_t num_feat, bool free_m=false) : 00596 num_vectors(num_vec), num_features(num_feat), do_free(free_m) 00597 { 00598 sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors); 00599 } 00600 00602 SGSparseMatrix(const SGSparseMatrix &orig) : 00603 num_vectors(orig.num_vectors), num_features(orig.num_features), 00604 sparse_matrix(orig.sparse_matrix), do_free(orig.do_free) { } 00605 00607 void free_matrix() 00608 { 00609 if (do_free) 00610 SG_FREE(sparse_matrix); 00611 00612 sparse_matrix=NULL; 00613 do_free=false; 00614 num_vectors=0; 00615 num_features=0; 00616 } 00617 00619 void own_matrix() 00620 { 00621 for (index_t i=0; i<num_vectors; i++) 00622 sparse_matrix[i].do_free=false; 00623 00624 do_free=false; 00625 } 00626 00628 void destroy_matrix() 00629 { 00630 do_free=true; 00631 free_matrix(); 00632 } 00633 00634 public: 00636 index_t num_vectors; 00637 00639 index_t num_features; 00640 00642 SGSparseVector<T>* sparse_matrix; 00643 00645 bool do_free; 00646 }; 00647 00648 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00649 enum EContainerType 00650 { 00651 CT_SCALAR=0, 00652 CT_VECTOR=1, 00653 CT_MATRIX=2, 00654 CT_NDARRAY=3, 00655 CT_SGVECTOR=4, 00656 CT_SGMATRIX=5 00657 }; 00658 00659 enum EStructType 00660 { 00661 ST_NONE=0, 00662 ST_STRING=1, 00663 ST_SPARSE=2 00664 }; 00665 00666 enum EPrimitiveType 00667 { 00668 PT_BOOL=0, 00669 PT_CHAR=1, 00670 PT_INT8=2, 00671 PT_UINT8=3, 00672 PT_INT16=4, 00673 PT_UINT16=5, 00674 PT_INT32=6, 00675 PT_UINT32=7, 00676 PT_INT64=8, 00677 PT_UINT64=9, 00678 PT_FLOAT32=10, 00679 PT_FLOAT64=11, 00680 PT_FLOATMAX=12, 00681 PT_SGOBJECT=13 00682 }; 00683 #endif 00684 00686 struct TSGDataType 00687 { 00689 EContainerType m_ctype; 00691 EStructType m_stype; 00693 EPrimitiveType m_ptype; 00694 00696 index_t *m_length_y; 00698 index_t *m_length_x; 00699 00705 explicit TSGDataType(EContainerType ctype, EStructType stype, 00706 EPrimitiveType ptype); 00713 explicit TSGDataType(EContainerType ctype, EStructType stype, 00714 EPrimitiveType ptype, index_t* length); 00722 explicit TSGDataType(EContainerType ctype, EStructType stype, 00723 EPrimitiveType ptype, index_t* length_y, 00724 index_t* length_x); 00725 00727 bool operator==(const TSGDataType& a); 00731 inline bool operator!=(const TSGDataType& a) 00732 { 00733 return !(*this == a); 00734 } 00735 00740 void to_string(char* dest, size_t n) const; 00741 00743 size_t sizeof_stype() const; 00745 size_t sizeof_ptype() const; 00746 00750 static size_t sizeof_sparseentry(EPrimitiveType ptype); 00751 00755 static size_t offset_sparseentry(EPrimitiveType ptype); 00756 00763 static void stype_to_string(char* dest, EStructType stype, 00764 EPrimitiveType ptype, size_t n); 00770 static void ptype_to_string(char* dest, EPrimitiveType ptype, 00771 size_t n); 00776 static bool string_to_ptype(EPrimitiveType* ptype, 00777 const char* str); 00778 00782 size_t get_size(); 00783 00787 index_t get_num_elements(); 00788 }; 00789 } 00790 #endif /* __DATATYPE_H__ */