SHOGUN  v3.2.0
VwParser.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #ifndef _VW_PARSER_H__
17 #define _VW_PARSER_H__
18 
19 #include <shogun/base/SGObject.h>
20 #include <shogun/io/SGIO.h>
21 #include <shogun/lib/Hash.h>
24 
25 namespace shogun
26 {
29 {
30  T_VW = 1,
32  T_DENSE = 3
33 };
34 
46 class CVwParser: public CSGObject
47 {
48 public:
52  CVwParser();
53 
59  CVwParser(CVwEnvironment* env_to_use);
60 
64  virtual ~CVwParser();
65 
72  {
73  SG_REF(env);
74  return env;
75  }
76 
82  void set_env(CVwEnvironment* env_to_use)
83  {
84  env = env_to_use;
85  SG_REF(env);
86  }
87 
94  void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
95  {
96  init_cache(fname, type);
97  }
98 
105  {
106  return cache_type;
107  }
108 
114  void set_write_cache(bool wr_cache)
115  {
116  write_cache = wr_cache;
117  if (wr_cache)
118  init_cache(NULL);
119  else
120  if (cache_writer)
122  }
123 
130  {
131  return write_cache;
132  }
133 
139  void set_mm(float64_t label)
140  {
141  env->min_label = CMath::min(env->min_label, label);
142  if (label != FLT_MAX)
143  env->max_label = CMath::max(env->max_label, label);
144  }
145 
152  void noop_mm(float64_t label) { }
153 
160  void set_minmax(float64_t label)
161  {
162  set_mm(label);
163  }
164 
173  int32_t read_features(CIOBuffer* buf, VwExample*& ex);
174 
183  int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);
184 
193  int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);
194 
200  virtual const char* get_name() const { return "VwParser"; }
201 
202 protected:
209  void init_cache(char * fname, EVwCacheType type = C_NATIVE);
210 
220 
229  void tokenize(char delim, substring s, v_array<substring> &ret);
230 
241  inline char* safe_index(char *start, char v, char *max)
242  {
243  while (start != max && *start != v)
244  start++;
245  return start;
246  }
247 
248 public:
251 
252 protected:
261 
262 private:
264  v_array<substring> channels;
265  v_array<substring> words;
266  v_array<substring> name;
267 };
268 
269 }
270 #endif // _VW_PARSER_H__
An I/O buffer class.
Definition: IOBuffer.h:44
void feature_value(substring &s, v_array< substring > &name, float32_t &v)
Definition: VwParser.cpp:271
CVwCacheWriter is the base class for all VW cache creating classes.
Definition: VwCacheWriter.h:35
uint32_t(* hash_func_t)(substring, uint32_t)
Hash function typedef, takes a substring and seed as parameters.
Definition: vw_constants.h:21
char * safe_index(char *start, char v, char *max)
Definition: VwParser.h:241
virtual ~CVwParser()
Definition: VwParser.cpp:42
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:39
int32_t read_features(CIOBuffer *buf, VwExample *&ex)
Definition: VwParser.cpp:48
CVwEnvironment * env
Environment of VW - used by parser.
Definition: VwParser.h:254
#define SG_UNREF(x)
Definition: SGRefObject.h:35
CVwParser is the object which provides the functions to parse examples from buffered input...
Definition: VwParser.h:46
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL's implementation.
EVwCacheType get_cache_type()
Definition: VwParser.h:104
void set_minmax(float64_t label)
Definition: VwParser.h:160
CVwCacheWriter * cache_writer
Object which will be used for writing cache.
Definition: VwParser.h:256
E_VW_PARSER_TYPE
The type of input to parse.
Definition: VwParser.h:28
int32_t read_dense_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:206
void set_write_cache(bool wr_cache)
Definition: VwParser.h:114
CVwEnvironment * get_env()
Definition: VwParser.h:71
struct Substring, specified by start position and end position.
Definition: SGIO.h:231
float64_t max_label
Largest label seen.
void tokenize(char delim, substring s, v_array< substring > &ret)
Definition: VwParser.cpp:295
bool write_cache
Whether to write cache or not.
Definition: VwParser.h:260
bool get_write_cache()
Definition: VwParser.h:129
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:102
int32_t read_svmlight_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:164
double float64_t
Definition: common.h:48
#define SG_REF(x)
Definition: SGRefObject.h:34
Example class for VW.
Definition: vw_example.h:56
static T max(T a, T b)
return the maximum of two integers
Definition: Math.h:160
virtual const char * get_name() const
Definition: VwParser.h:200
EVwCacheType cache_type
Type of cache.
Definition: VwParser.h:258
float float32_t
Definition: common.h:47
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:16
void noop_mm(float64_t label)
Definition: VwParser.h:152
void set_mm(float64_t label)
Definition: VwParser.h:139
void set_env(CVwEnvironment *env_to_use)
Definition: VwParser.h:82
static T min(T a, T b)
return the minimum of two integers
Definition: Math.h:153
void init_cache(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.cpp:248
hash_func_t hasher
Hash function to use, of type hash_func_t.
Definition: VwParser.h:250
void set_cache_parameters(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.h:94

SHOGUN Machine Learning Toolbox - Documentation