-
Notifications
You must be signed in to change notification settings - Fork 0
/
GNUMOFileReader.pas
440 lines (363 loc) · 12.1 KB
/
GNUMOFileReader.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
unit GNUMOFileReader;
interface
uses System.SysUtils, System.Classes, System.Generics.Collections, System.Generics.Defaults;
type
{ char *gettext (const char *msgid) }
TGetTextProc = function(const MsgId: string): string of object;
{ char *ngettext (const char *msgid1, const char *msgid2, unsigned long int n) }
TNGetTextProc = function(const MsgId1, MsgId2: string; N: Integer): string of object;
/// <summary>
/// Handler for plural form detection. See PluralRU in this module for example.
///
/// https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
/// </summary>
TPluralHandler = reference to function(N: Integer): Integer;
EGNUGettext = class(Exception);
EGGIOError = class(EGNUGettext);
TGNUMOFileReader = class sealed
private type
{ Metadata headers dictionary }
TMetadataDict = TDictionary<string, string>;
TPluralPair = TPair<string, Integer>;
/// <summary>
/// Catalog with msgid and its translation. The key is TPluralPair to support plural forms.
/// By default non plural msgid is TPluralPair<msgid, -1>
/// Plural forms stored as TPluralPair<msgid, plural>
/// </summary>
TStringsPair = TDictionary<TPluralPair, string>;
{ Header for binary .mo file format }
TMOFileHeader = record
{ The magic number }
Magic: Cardinal;
{ The revision number of the file format }
Revision: Cardinal;
{ The number of strings pairs }
NStrings: Cardinal;
{ Offset of table with start offsets of original strings }
OrigTabOffset: Cardinal;
{ Offset of table with start offsets of translated strings }
TransTabOffset: Cardinal;
{ Size of hash table }
// HashTabSize: Cardinal;
{ Offset of first hash table entry }
// HashTabOffset: Cardinal;
end;
// to be able locate TPluralPair key in TDictionary
TPluralPairComparer = class(TEqualityComparer<TPluralPair>)
public
function Equals(const Left: TPluralPair; const Right: TPluralPair): Boolean; override;
function GetHashCode(const Value: TPluralPair): Integer; override;
end;
private
var
FHeader: TMOFileHeader;
FFileName: string;
FBuffer: TArray<Byte>;
FBufLen: NativeInt;
FNeedSwap: Boolean;
FInfo: TMetadataDict;
FCatalog: TStringsPair;
// encoding used for reading metadata before charset is retrieved from headers
FDefaultEnc: TEncoding;
// encoding determined from Context-Type field of metadata
FStringsEncoding: TEncoding;
FEncodingIsSet: Boolean;
FGetText: TGetTextProc;
FNGetText: TNGetTextProc;
FComparer: IEqualityComparer<TPluralPair>;
FPluralHandler: TPluralHandler;
procedure EnsureSignature;
procedure EnsureRevision;
procedure ReadBinaryFile;
procedure ReadHeader;
procedure ReadStrings;
procedure ReadMetadata(const ABuf: TBytes);
public
constructor Create(const AMOFileName: string);
destructor Destroy; override;
procedure Parse;
function GetKeys: TStringsPair.TKeyCollection;
function GetValues: TStringsPair.TValueCollection;
/// <summary>
/// This encoding is used for parsing metadata.
/// The strings encoding will be determined later based on metadata Content-Type field.
/// </summary>
property DefaultEncoding: TEncoding read FDefaultEnc write FDefaultEnc;
property Info: TMetadataDict read FInfo;
function GetText(const MsgId: string): string;
function NGetText(const MsgId1, MsgId2: string; N: Integer): string;
property Items[const MsgId: string]: string read GetText; default;
property Plurals[const MsgId1, MsgId2: string; N: Integer]: string read NGetText;
/// <summary>
/// Pointer to GetText method usually used for translations.
/// e.g. var _: TGetTextProc; _('Text to translate');
/// </summary>
property Get_gettext: TGetTextProc read FGetText;
property Get_ngettext: TNGetTextProc read FNGetText;
/// <summary>
/// Handler used to evaluate plural form
/// </summary>
property PluralHandler: TPluralHandler read FPluralHandler write FPluralHandler;
end;
function PluralEN(N: Integer): Integer;
function PluralRU(N: Integer): Integer;
implementation
type
Consts = record
type
Offset = record
public const
SIGNATURE = 0;
VERSION = 4;
MSG_COUNT = 8;
TEXTS = 12;
TRANSLATION = 16;
end;
Str = record
public const
S_VER_NOT_SUPPORTED = 'The version %d is not supported.';
S_NOT_VALID_FILE = 'File "%s" is not in GNU .mo format.';
end;
public const
MinVersion = [0, 1];
LE_MAGIC = $950412DE; // little-endian magic number
BE_MAGIC = $DE120495; // big-endian magic number
end;
/// <summary>
/// Plural-Forms: nplurals=2; plural=n != 1;
///
/// See: https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
/// </summary>
function PluralEN(N: Integer): Integer;
begin
Result := Integer(Abs(N) <> 1);
end;
/// <summary>
/// Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
///
/// See: https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
/// </summary>
function PluralRU(N: Integer): Integer;
var
n1, n2: Integer;
begin
N := Abs(N);
n1 := N mod 10;
n2 := N mod 100;
if (n1 = 1) and (n2 <> 11) then
Result := 0
else if (n1 >= 2) and (n1 <= 4) and ((n2 < 10) or (n2 >= 20)) then
Result := 1
else
Result := 2;
end;
function GetDW(ABuffer: TBytes; Offset: Cardinal): Cardinal; inline;
begin
Result := PCardinal(PByte(@ABuffer[0]) + Offset)^;
end;
function GetBuff(ABuffer: TBytes; AOffset: Cardinal; ALength: Integer): TBytes;
var
PByteDst: PByte;
PByteSrc: PByte;
begin
if ALength < 1 then
Exit;
SetLength(Result, ALength);
PByteSrc := PByte(@ABuffer[0]) + AOffset;
PByteDst := @Result[0];
// CopyMemory(@Result[0], PByte(@ABuffer[0]) + AOffset, ALength);
Move(PByteSrc^, PByteDst^, ALength);
end;
{ TGNUMOFileReader }
constructor TGNUMOFileReader.Create(const AMOFileName: string);
begin
FFileName := AMOFileName;
// key case insensetive dictionary
FInfo := TMetadataDict.Create(TDelegatedEqualityComparer<string>.Create(
function(const Left, Right: string): Boolean
begin
Result := CompareText(Left, Right) = 0;
end,
function(const Value: string): Integer
begin
Result := Value.ToLower.GetHashCode;
end));
FComparer := TPluralPairComparer.Create;
FCatalog := TStringsPair.Create(FComparer);
FDefaultEnc := TEncoding.Default;
FStringsEncoding := TEncoding.Default;
FGetText := GetText;
FNGetText := NGetText;
// default handler for English language
// FPluralHandler := PluralEN;
FPluralHandler := function(N: Integer): Integer
begin
Result := Integer(Abs(N) <> 1);
end;
end;
destructor TGNUMOFileReader.Destroy;
begin
FInfo.Free;
FCatalog.Free;
FDefaultEnc := nil;
if (FEncodingIsSet) then
FStringsEncoding.Free;
inherited;
end;
procedure TGNUMOFileReader.EnsureRevision;
begin
if not(FHeader.Revision shr 16 in Consts.MinVersion) then
raise EGNUGettext.CreateFmt(Consts.Str.S_VER_NOT_SUPPORTED, [FHeader.Revision shr 16]);
end;
procedure TGNUMOFileReader.EnsureSignature;
begin
if (FHeader.Magic <> Consts.LE_MAGIC) and (FHeader.Magic <> Consts.BE_MAGIC) then
raise EGGIOError.CreateFmt(Consts.Str.S_NOT_VALID_FILE, [FFileName]);
end;
function TGNUMOFileReader.GetKeys: TStringsPair.TKeyCollection;
begin
Result := FCatalog.Keys;
end;
function TGNUMOFileReader.GetText(const MsgId: string): string;
var
p: TPluralPair;
begin
Result := MsgId;
p := TPluralPair.Create(MsgId, -1);
if FCatalog.ContainsKey(p) then
Result := FCatalog[p];
end;
function TGNUMOFileReader.GetValues: TStringsPair.TValueCollection;
begin
Result := FCatalog.Values;
end;
function TGNUMOFileReader.NGetText(const MsgId1, MsgId2: string; N: Integer): string;
var
p: TPluralPair;
begin
p := TPluralPair.Create(MsgId1, FPluralHandler(N));
if FCatalog.ContainsKey(p) then
Result := FCatalog[p]
else if N = 1 then
Result := MsgId1
else
Result := MsgId2;
end;
procedure TGNUMOFileReader.Parse;
begin
ReadBinaryFile();
ReadHeader();
EnsureSignature();
Assert(not(FHeader.Magic = Consts.BE_MAGIC), 'Big-endian is not supported.');
EnsureRevision();
ReadStrings();
// we don't need buffer anymore
SetLength(FBuffer, 0);
end;
procedure TGNUMOFileReader.ReadHeader;
begin
FHeader.Magic := GetDW(FBuffer, Consts.Offset.SIGNATURE);
FHeader.Revision := GetDW(FBuffer, Consts.Offset.VERSION);
FHeader.NStrings := GetDW(FBuffer, Consts.Offset.MSG_COUNT);
FHeader.OrigTabOffset := GetDW(FBuffer, Consts.Offset.TEXTS);
FHeader.TransTabOffset := GetDW(FBuffer, Consts.Offset.TRANSLATION);
end;
procedure TGNUMOFileReader.ReadBinaryFile;
var
stream: TFileStream;
begin
stream := TFileStream.Create(FFileName, fmOpenRead, fmShareDenyWrite);
try
FBufLen := stream.Size;
SetLength(FBuffer, FBufLen);
stream.ReadBuffer(FBuffer, FBufLen);
finally
stream.Free;
end;
end;
procedure TGNUMOFileReader.ReadMetadata(const ABuf: TBytes);
var
msgs, line, lastk, k, v: string;
begin
msgs := FDefaultEnc.GetString(ABuf);
for var l in msgs.Split([AnsiChar(#10)]) do
begin
line := l.Trim();
if line = '' then
Continue;
// skipping comments
if line.StartsWith('#-#-#-#-#') and line.EndsWith('#-#-#-#-#') then
Continue;
if line.IndexOf(':') > -1 then
begin
var _ := line.Split([':'], 2);
k := LowerCase(_[0]).Trim();
v := _[1].Trim();
FInfo.Add(k, v);
lastk := k;
end
else if lastk <> '' then
FInfo[lastk] := #10 + FInfo[lastk] + line;
if k = 'content-type' then
begin
var _ := v.Split(['charset='], 2);
if High(_) = 1 then
begin
FStringsEncoding := TEncoding.GetEncoding(_[1]);
FEncodingIsSet := True;
end;
end
else if (k = 'plural-forms') then
begin
// raise EGNUGettext.Create('Plural forms format is not supported yet.');
end;
end;
end;
procedure TGNUMOFileReader.ReadStrings;
var
mlen, moff, tlen, toff, msgCount, textsOffset, transOffset: Cardinal;
MsgId, msg: TBytes;
sMsgId: string;
AMsgs: TArray<string>;
begin
msgCount := FHeader.NStrings;
textsOffset := FHeader.OrigTabOffset;
transOffset := FHeader.TransTabOffset;
for var i := 0 to msgCount - 1 do
begin
mlen := GetDW(FBuffer, textsOffset); // O - length of first string
moff := GetDW(FBuffer, textsOffset + 4); // offset of first string
tlen := GetDW(FBuffer, transOffset); // T - length of first translation
toff := GetDW(FBuffer, transOffset + 4); // offset of first translation
if (moff + mlen < FBufLen) and (toff + tlen < FBufLen) then
begin
MsgId := GetBuff(FBuffer, moff, mlen);
msg := GetBuff(FBuffer, toff, tlen);
if mlen = 0 then
ReadMetadata(msg);
sMsgId := FStringsEncoding.GetString(MsgId);
// if contains plural forms
if sMsgId.IndexOf(#0) > -1 then
begin
sMsgId := sMsgId.Split([#0], 2)[0];
AMsgs := FStringsEncoding.GetString(msg).Split([#0], 2);
for var j := 0 to High(AMsgs) do
FCatalog.Add(TPluralPair.Create(sMsgId, j), AMsgs[j]);
end
else
FCatalog.Add(TPluralPair.Create(sMsgId, -1), FStringsEncoding.GetString(msg));
Inc(textsOffset, 8);
Inc(transOffset, 8);
end;
end;
end;
{ TGNUMOFileReader.TPluralPairComparer }
function TGNUMOFileReader.TPluralPairComparer.Equals(const Left, Right: TPluralPair): Boolean;
begin
Result := (Left.Key = Right.Key) and (Left.Value = Right.Value);
end;
function TGNUMOFileReader.TPluralPairComparer.GetHashCode(const Value: TPluralPair): Integer;
begin
Result := Value.Key.GetHashCode;
end;
end.