presage
0.9.1
src
lib
core
tokenizer
reverseTokenizer.cpp
Go to the documentation of this file.
1
2
/******************************************************
3
* Presage, an extensible predictive text entry system
4
* ---------------------------------------------------
5
*
6
* Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
11
(at your option) any later version.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License along
19
with this program; if not, write to the Free Software Foundation, Inc.,
20
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
*
22
**********(*)*/
23
24
25
#include "
reverseTokenizer.h
"
26
27
ReverseTokenizer::ReverseTokenizer
(std::istream& stream,
28
const
std::string blanks,
29
const
std::string separs)
30
:
Tokenizer
(stream, blanks, separs)
31
{
32
offset
=
offend
;
33
//assert( stream.good());
34
//assert(!stream.fail());
35
//assert(!stream.bad() );
36
//assert(!stream.eof() );
37
// stream clearing needed because offset is positioned at end
38
stream
.clear();
39
40
//std::cerr << "ReverseTokenizer::ReverseTokenizer() offbeg: " << offbeg
41
// << " offset: " << offset << " offend: " << offend << std::endl;
42
}
43
44
ReverseTokenizer::~ReverseTokenizer
()
45
{}
46
47
int
ReverseTokenizer::countTokens
()
48
{
49
StreamGuard
guard(
stream
,
offset
);
50
51
// store current seek pointer position
52
std::streamoff curroff =
offset
;
53
54
// position get pointer at end of stream
55
offset
=
offend
;
56
57
int
count = 0;
58
while
(
hasMoreTokens
()) {
59
nextToken
();
60
count++;
61
}
62
63
// reposition seek get pointer to original position
64
offset
= curroff;
65
66
return
count;
67
}
68
69
bool
ReverseTokenizer::hasMoreTokens
()
const
70
{
71
//std::cerr << "ReverseTokenizer::hasMoreTokens() offbeg: " << offbeg
72
// << " offset: " << offset << " offend: " << offend << std::endl;
73
if
(
offbeg
<
offset
) {
74
return
true
;
75
}
else
{
76
return
false
;
77
}
78
}
79
80
std::string
ReverseTokenizer::nextToken
()
81
{
82
StreamGuard
guard(
stream
,
offset
);
83
84
int
current;
85
std::string str;
86
87
if
(
stream
.good()) {
88
while
(
offbeg
<
offset
89
&& str.empty()) {
90
stream
.seekg(
offset
- 1);
91
current =
stream
.peek();
92
93
if
(
offset
==
offend
&&
94
(
isSeparator
(current) ||
isBlankspace
(current))) {
95
offset
--;
96
return
str;
97
}
98
99
while
((
isBlankspace
(current) ||
isSeparator
(current))
100
&&
offbeg
<
offset
) {
101
offset
--;
102
stream
.seekg(
offset
- 1);
103
current =
stream
.peek();
104
}
105
106
while
(!
isBlankspace
(current)
107
&& !
isSeparator
(current)
108
&&
offbeg
<
offset
) {
109
110
if
(
lowercaseMode
() ) {
111
current = tolower( current );
112
}
113
114
// since the token is read backwards, the string
115
// needs to be reversed by inserting the char at
116
// the front
117
str.insert(str.begin(), current);
118
119
offset
--;
120
stream
.seekg(
offset
- 1);
121
current =
stream
.peek();
122
}
123
}
124
}
125
126
// if (stream.good()) {
127
// do {
128
// do {
129
// current = stream.peek();
130
// offset--;
131
// stream.seekg(offset);
132
//
133
// // handle case where last character is a separator by
134
// // returning an empty token
135
// if (offset == offend - 2
136
// && isSeparator(current)) {
137
// return "";
138
// }
139
//
140
// //std::cerr << "[DEBUG] read: "
141
// // << static_cast<char>(current)
142
// // << std::endl;
143
//
144
// if (!isBlankspace(current)
145
// && !isSeparator(current)
146
// && offset >= offbeg - 1) {
147
//
148
// if( lowercaseMode() ) {
149
// current = tolower( current );
150
// }
151
//
152
// // since the token is read backwards, the string
153
// // needs to be reversed by inserting the char at
154
// // the front
155
// str.insert(str.begin(), current);
156
//
157
// //std::cerr << "[DEBUG] pushed: "
158
// // << static_cast<char>(current)
159
// // << std::endl;
160
// //std::cerr << "[DEBUG] partial string: "
161
// // << str << std::endl;
162
// }
163
// } while (!isBlankspace(current)
164
// && !isSeparator(current)
165
// && (offset >= offbeg));
166
// } while (str.empty() && (offset >= offbeg));
167
// }
168
169
//std::cerr << "[DEBUG] token: " << str << std::endl;
170
171
return
str;
172
}
173
174
double
ReverseTokenizer::progress
()
const
175
{
176
return
static_cast<double>(
offend
-
offset
) / (
offend
-
offbeg
);
177
}
ReverseTokenizer::hasMoreTokens
virtual bool hasMoreTokens() const
Definition:
reverseTokenizer.cpp:68
Tokenizer::isBlankspace
bool isBlankspace(const int character) const
Definition:
tokenizer.cpp:90
ReverseTokenizer::~ReverseTokenizer
virtual ~ReverseTokenizer()
Definition:
reverseTokenizer.cpp:43
Tokenizer
Definition:
tokenizer.h:63
ReverseTokenizer::nextToken
virtual std::string nextToken()
Definition:
reverseTokenizer.cpp:79
Tokenizer::lowercaseMode
bool lowercaseMode() const
Definition:
tokenizer.cpp:85
Tokenizer::offbeg
std::streamoff offbeg
Definition:
tokenizer.h:164
ReverseTokenizer::ReverseTokenizer
ReverseTokenizer(std::istream &stream, const std::string blanks, const std::string separs)
Definition:
reverseTokenizer.cpp:26
ReverseTokenizer::countTokens
virtual int countTokens()
Definition:
reverseTokenizer.cpp:46
Tokenizer::isSeparator
bool isSeparator(const int character) const
Definition:
tokenizer.cpp:100
Tokenizer::offset
std::streamoff offset
Definition:
tokenizer.h:166
ReverseTokenizer::progress
virtual double progress() const
Definition:
reverseTokenizer.cpp:173
Tokenizer::StreamGuard
Definition:
tokenizer.h:143
Tokenizer::stream
std::istream & stream
Definition:
tokenizer.h:162
Tokenizer::offend
std::streamoff offend
Definition:
tokenizer.h:165
reverseTokenizer.h
Generated on Sat Jan 11 2020 18:56:52 for presage by
1.8.16