-
Notifications
You must be signed in to change notification settings - Fork 2
/
textfile.h
395 lines (295 loc) · 9.91 KB
/
textfile.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
// textfile.h: interface for the textfile class.
//
//////////////////////////////////////////////////////////////////////
#ifndef PEKSPRODUCTIONS_TEXTFILE
#define PEKSPRODUCTIONS_TEXTFILE
/*
CTextFileDocument let you write and read text files with
different encodings (ASCII, UTF-8, Unicode 16 little/big
endian is supported). When you work with ASCII-files
CTextFileDocument will help you convert strings to/from
different code-pages.
Let me now if you find something strange or just gets
a clever idea...
Get the latest version at
http://www.codeproject.com/file/textfiledocument.asp
Version 1.22 - 21 May 2005
! Reading a line before reading everything could add an
extra line break.
! A member variable wasn't always initialized, could cause
problems when reading single lines.
! A smarter/easier algorithm is used when reading single lines.
Version 1.21 - 10 Apr 2005
! Fix by sammyc: If it was not possible to open a file in techlevel 1,
IsOpen returned a bad result.
Version 1.20 - 15 Jan 2005
! Fixed some problems when converting multi-byte string to Unicode,
and vice versa.
+ Improved conversion routines. It's now possible to define
which code-page to use.
+ It's now possible to set which character to use when it's
not possible to convert an Unicode character to an multi-byte character.
+ It's now possible to see if data was lost during conversion.
+ Better support for other platforms, it's no longer necessary to use
MFC in Windows.
! Reading very small files (1 byte) failed.
Version 1.13 - 26 Dec 2004
! Fixes by drinktea:
! If a text file begun with an empty line, the file
wasn't read correctly (first empty line was ignored).
! Fixes in CharToWstring and WcharToString.
Version 1.12 - 17 Oct 2004
+ Minor memory leak when open file failed, fixed.
Version 1.11 - 28 Aug 2004
! Calling WriteEndl() when writing an ASCII file could make
the file incorrectly written. Fixed.
+ ASCII files is written faster.
Version 1.10 - 13 Aug 2004
Sorry about the quick update.
+ Improved performance (much faster now, but code is more complicated :-/).
+ Buffer is used when writing files.
+ Buffer is used in non-mfc compilers
Version 1.0 - 12 Aug 2004
Initial version.
PEK
*/
/*
If you are creating a console project that doesn't support
MFC in Visual Studio, you will probably need to define
techlevel to 0:
#define PEK_TX_TECHLEVEL 0
In other cases it usually not necessary to define which "tech-level"
to use, the code below should do this for you. However,
if you need to this is the difference:
#define PEK_TX_TECHLEVEL 0
You should use this if you running on a none-Windows
platform. This uses fstream internally to read and
write files. If you want to change codepage you should
call setlocal.
#define PEK_TX_TECHLEVEL 1
Use this on Windows if you don't use MFC. This calls
Windows API directly to read and write files. If
something couldn't be read/written a CTextFileException
is thrown. Unicode in filenames are supported.
Codepages are supported.
#define PEK_TX_TECHLEVEL 2
Use this when you are using MFC. This uses CFile
internally to read and write files. If data can't be
read/written, CFile will throw an exception. Codepages
are supported. Unicode in filenames are supported.
CString is supported.
*/
#ifndef PEK_TX_TECHLEVEL
//Autodetect which "tech level" to use
#ifdef _MFC_VER
#define PEK_TX_TECHLEVEL 2
#else
#ifdef _WIN32
#define PEK_TX_TECHLEVEL 1
#else
#define PEK_TX_TECHLEVEL 0
#endif
#endif
#endif
#if PEK_TX_TECHLEVEL > 0
/*
In windows it's possible to use Unicode in filenames,
in unix it's not possible (afaik). FILENAMECHAR is the
charactertype.
*/
//#include <afx.h>
#ifndef _UNICODE
typedef char FILENAMECHAR;
#else
typedef wchar_t FILENAMECHAR;
#endif
#else
#include <fstream>
typedef char FILENAMECHAR;
#endif
#include <string>
using namespace std;
class CTextFileBase
{
public:
enum TEXTENCODING { ASCII, UNI16_BE, UNI16_LE, UTF_8 };
CTextFileBase();
~CTextFileBase();
//Is the file open?
int IsOpen();
//Close the file
virtual void Close();
//Return the encoding of the file (ASCII, UNI16_BE, UNI16_LE or UTF_8);
TEXTENCODING GetEncoding() const;
//Set which character that should be used when converting
//Unicode->multi byte and an unknown character is found ('?' is default)
void SetUnknownChar(const char unknown);
//Returns true if data was lost
//(happens when converting Unicode->multi byte string and an unmappable
//characters is found).
bool IsDataLost() const;
//Reset the data lost flag
void ResetDataLostFlag();
#if PEK_TX_TECHLEVEL > 0
/* Note!
The codepage is only used when converting from multibyte
to Unicode or vice versa. It is not used when reading
ANSI-files in none-Unicode strings, or reading
Unicode-files in Unicode strings.
This means that if you want to read a ANSI-textfile
(with some code page) to an non-Unicode string you
must do the conversion yourself. But this is easy :-).
Read the file with the codepage to a wstring, then use
ConvertCharToWstring to convert the wstring to a
string.
*/
//Set codepage to use when working with none-Unicode strings
void SetCodePage(const UINT codepage);
//Get codepage to use when working with none-Unicode strings
UINT GetCodePage() const;
//Convert char* to wstring
static void ConvertCharToWstring(const char* from, wstring &to, UINT codepage=CP_ACP);
//Convert wchar_t* to string
static void ConvertWcharToString(const wchar_t* from, string &to, UINT codepage=CP_ACP, bool* datalost=NULL, char unknownchar=0);
#else
//Convert char* to wstring
static void ConvertCharToWstring(const char* from, wstring &to);
//Convert wchar_t* to string
static void ConvertWcharToString(const wchar_t* from, string &to, bool* datalost=NULL, char unknownchar='a');
#endif
protected:
//Convert char* to wstring
void CharToWstring(const char* from, wstring &to) const;
//Convert wchar_t* to string
void WcharToString(const wchar_t* from, string &to);
//The enocoding of the file
TEXTENCODING m_encoding;
//Buffersize
#define BUFFSIZE 1024
#if PEK_TX_TECHLEVEL == 0
//Use fstream
fstream m_file;
#elif PEK_TX_TECHLEVEL == 1
public:
HANDLE m_hFile;
protected:
#else
//In windows we are using CFile
CFile* m_file;
bool m_closeAndDeleteFile;
#endif
//These controls the buffer for reading/writing
//True if end of file
bool m_endoffile;
//Readingbuffer
char m_buf[BUFFSIZE];
//Bufferposition
int m_buffpos;
//Size of buffer
int m_buffsize;
//Character used when converting Unicode->multi byte and an unknown character was found
char m_unknownChar;
//Is true if data was lost when converting Unicode->multi-byte
bool m_datalost;
#if PEK_TX_TECHLEVEL > 0
UINT m_codepage;
#endif
};
class CTextFileWrite : public CTextFileBase
{
public:
CTextFileWrite(const FILENAMECHAR* filename, TEXTENCODING type=ASCII);
#if PEK_TX_TECHLEVEL == 2
CTextFileWrite(CFile* file, TEXTENCODING type=ASCII);
#endif
~CTextFileWrite();
//Write routines
void Write(const char* text);
void Write(const wchar_t* text);
void Write(const string& text);
void Write(const wstring& text);
CTextFileWrite& operator << (const char c);
CTextFileWrite& operator << (const char* text);
CTextFileWrite& operator << (const string& text);
CTextFileWrite& operator << (const wchar_t wc);
CTextFileWrite& operator << (const wchar_t* text);
CTextFileWrite& operator << (const wstring& text);
//Write new line (two characters, 13 and 10)
void WriteEndl();
//Close the file
virtual void Close();
private:
//Write and empty buffer
void Flush();
//Write a single one wchar_t, convert first
void WriteWchar(const wchar_t ch);
//Write one byte
void WriteByte(const unsigned char byte);
//Write a c-string in ASCII-format
void WriteAsciiString(const char* s);
//Write byte order mark
void WriteBOM();
};
class CTextFileRead : public CTextFileBase
{
public:
CTextFileRead(const FILENAMECHAR* filename);
#if PEK_TX_TECHLEVEL == 2
CTextFileRead(CFile* file);
#endif
//Returns false if end-of-file was reached
//(line will not be changed). If returns true,
//it means that last line ended with a line break.
bool ReadLine(string& line);
bool ReadLine(wstring& line);
//Returns everything from current position.
bool Read(string& all, const string newline="\r\n");
bool Read(wstring& all, const wstring newline=L"\r\n");
#if PEK_TX_TECHLEVEL == 2
bool ReadLine(CString& line);
bool Read(CString& all, const CString newline=_T("\r\n"));
#endif
//End of file?
bool Eof() const;
private:
//Guess the number of characters in the file
int GuessCharacterCount();
//Read line to wstring
bool ReadWcharLine(wstring& line);
//Read line to string
bool ReadCharLine(string& line);
//Reset the filepointer to start
void ResetFilePointer();
//Read one wchar_t
void ReadWchar(wchar_t& ch);
//Read one byte
void ReadByte(unsigned char& ch);
//Detect encoding
void ReadBOM();
//Use extra buffer. Sometimes we read one character to much, save it.
bool m_useExtraBuffer;
//Used to read see if the first line in file is to read
//(so we know how to handle \n\r)
bool m_firstLine;
//Extra buffer. It's ok to share the memory
union
{
char m_extraBuffer_char;
wchar_t m_extraBuffer_wchar;
};
};
#if PEK_TX_TECHLEVEL == 1
//This is only used in Windows mode (no MFC)
//An exception is thrown will data couldn't be read or written
class CTextFileException
{
public:
CTextFileException(DWORD err)
{
m_errorCode = err;
}
//Value returned by GetLastError()
DWORD m_errorCode;
};
#endif
#endif //PEKSPRODUCTIONS_TEXTFILE