Statistics
| Branch: | Tag: | Revision:

root / Assets / Plugins / LitJson / Lexer.cs @ 11:01dde4258840

History | View | Annotate | Download (22.6 kB)

1
#region Header
2
/**
3
 * Lexer.cs
4
 *   JSON lexer implementation based on a finite state machine.
5
 *
6
 * The authors disclaim copyright to this source code. For more details, see
7
 * the COPYING file included with this distribution.
8
 **/
9
#endregion
10
11
12
using System;
13
using System.Collections.Generic;
14
using System.IO;
15
using System.Text;
16
17
18
namespace LitJson
19
{
20
    internal class FsmContext
21
    {
22
        public bool  Return;
23
        public int   NextState;
24
        public Lexer L;
25
        public int   StateStack;
26
    }
27
28
29
    internal class Lexer
30
    {
31
        #region Fields
32
        private delegate bool StateHandler (FsmContext ctx);
33
34
        private static int[]          fsm_return_table;
35
        private static StateHandler[] fsm_handler_table;
36
37
        private bool          allow_comments;
38
        private bool          allow_single_quoted_strings;
39
        private bool          end_of_input;
40
        private FsmContext    fsm_context;
41
        private int           input_buffer;
42
        private int           input_char;
43
        private TextReader    reader;
44
        private int           state;
45
        private StringBuilder string_buffer;
46
        private string        string_value;
47
        private int           token;
48
        private int           unichar;
49
        #endregion
50
51
52
        #region Properties
53
        public bool AllowComments {
54
            get { return allow_comments; }
55
            set { allow_comments = value; }
56
        }
57
58
        public bool AllowSingleQuotedStrings {
59
            get { return allow_single_quoted_strings; }
60
            set { allow_single_quoted_strings = value; }
61
        }
62
63
        public bool EndOfInput {
64
            get { return end_of_input; }
65
        }
66
67
        public int Token {
68
            get { return token; }
69
        }
70
71
        public string StringValue {
72
            get { return string_value; }
73
        }
74
        #endregion
75
76
77
        #region Constructors
78
        static Lexer ()
79
        {
80
            PopulateFsmTables ();
81
        }
82
83
        public Lexer (TextReader reader)
84
        {
85
            allow_comments = true;
86
            allow_single_quoted_strings = true;
87
88
            input_buffer = 0;
89
            string_buffer = new StringBuilder (128);
90
            state = 1;
91
            end_of_input = false;
92
            this.reader = reader;
93
94
            fsm_context = new FsmContext ();
95
            fsm_context.L = this;
96
        }
97
        #endregion
98
99
100
        #region Static Methods
101
        private static int HexValue (int digit)
102
        {
103
            switch (digit) {
104
            case 'a':
105
            case 'A':
106
                return 10;
107
108
            case 'b':
109
            case 'B':
110
                return 11;
111
112
            case 'c':
113
            case 'C':
114
                return 12;
115
116
            case 'd':
117
            case 'D':
118
                return 13;
119
120
            case 'e':
121
            case 'E':
122
                return 14;
123
124
            case 'f':
125
            case 'F':
126
                return 15;
127
128
            default:
129
                return digit - '0';
130
            }
131
        }
132
133
        private static void PopulateFsmTables ()
134
        {
135
            // See section A.1. of the manual for details of the finite
136
            // state machine.
137
            fsm_handler_table = new StateHandler[28] {
138
                State1,
139
                State2,
140
                State3,
141
                State4,
142
                State5,
143
                State6,
144
                State7,
145
                State8,
146
                State9,
147
                State10,
148
                State11,
149
                State12,
150
                State13,
151
                State14,
152
                State15,
153
                State16,
154
                State17,
155
                State18,
156
                State19,
157
                State20,
158
                State21,
159
                State22,
160
                State23,
161
                State24,
162
                State25,
163
                State26,
164
                State27,
165
                State28
166
            };
167
168
            fsm_return_table = new int[28] {
169
                (int) ParserToken.Char,
170
                0,
171
                (int) ParserToken.Number,
172
                (int) ParserToken.Number,
173
                0,
174
                (int) ParserToken.Number,
175
                0,
176
                (int) ParserToken.Number,
177
                0,
178
                0,
179
                (int) ParserToken.True,
180
                0,
181
                0,
182
                0,
183
                (int) ParserToken.False,
184
                0,
185
                0,
186
                (int) ParserToken.Null,
187
                (int) ParserToken.CharSeq,
188
                (int) ParserToken.Char,
189
                0,
190
                0,
191
                (int) ParserToken.CharSeq,
192
                (int) ParserToken.Char,
193
                0,
194
                0,
195
                0,
196
                0
197
            };
198
        }
199
200
        private static char ProcessEscChar (int esc_char)
201
        {
202
            switch (esc_char) {
203
            case '"':
204
            case '\'':
205
            case '\\':
206
            case '/':
207
                return Convert.ToChar (esc_char);
208
209
            case 'n':
210
                return '\n';
211
212
            case 't':
213
                return '\t';
214
215
            case 'r':
216
                return '\r';
217
218
            case 'b':
219
                return '\b';
220
221
            case 'f':
222
                return '\f';
223
224
            default:
225
                // Unreachable
226
                return '?';
227
            }
228
        }
229
230
        private static bool State1 (FsmContext ctx)
231
        {
232
            while (ctx.L.GetChar ()) {
233
                if (ctx.L.input_char == ' ' ||
234
                    ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r')
235
                    continue;
236
237
                if (ctx.L.input_char >= '1' && ctx.L.input_char <= '9') {
238
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
239
                    ctx.NextState = 3;
240
                    return true;
241
                }
242
243
                switch (ctx.L.input_char) {
244
                case '"':
245
                    ctx.NextState = 19;
246
                    ctx.Return = true;
247
                    return true;
248
249
                case ',':
250
                case ':':
251
                case '[':
252
                case ']':
253
                case '{':
254
                case '}':
255
                    ctx.NextState = 1;
256
                    ctx.Return = true;
257
                    return true;
258
259
                case '-':
260
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
261
                    ctx.NextState = 2;
262
                    return true;
263
264
                case '0':
265
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
266
                    ctx.NextState = 4;
267
                    return true;
268
269
                case 'f':
270
                    ctx.NextState = 12;
271
                    return true;
272
273
                case 'n':
274
                    ctx.NextState = 16;
275
                    return true;
276
277
                case 't':
278
                    ctx.NextState = 9;
279
                    return true;
280
281
                case '\'':
282
                    if (! ctx.L.allow_single_quoted_strings)
283
                        return false;
284
285
                    ctx.L.input_char = '"';
286
                    ctx.NextState = 23;
287
                    ctx.Return = true;
288
                    return true;
289
290
                case '/':
291
                    if (! ctx.L.allow_comments)
292
                        return false;
293
294
                    ctx.NextState = 25;
295
                    return true;
296
297
                default:
298
                    return false;
299
                }
300
            }
301
302
            return true;
303
        }
304
305
        private static bool State2 (FsmContext ctx)
306
        {
307
            ctx.L.GetChar ();
308
309
            if (ctx.L.input_char >= '1' && ctx.L.input_char<= '9') {
310
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
311
                ctx.NextState = 3;
312
                return true;
313
            }
314
315
            switch (ctx.L.input_char) {
316
            case '0':
317
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
318
                ctx.NextState = 4;
319
                return true;
320
321
            default:
322
                return false;
323
            }
324
        }
325
326
        private static bool State3 (FsmContext ctx)
327
        {
328
            while (ctx.L.GetChar ()) {
329
                if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
330
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
331
                    continue;
332
                }
333
334
                if (ctx.L.input_char == ' ' ||
335
                    ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
336
                    ctx.Return = true;
337
                    ctx.NextState = 1;
338
                    return true;
339
                }
340
341
                switch (ctx.L.input_char) {
342
                case ',':
343
                case ']':
344
                case '}':
345
                    ctx.L.UngetChar ();
346
                    ctx.Return = true;
347
                    ctx.NextState = 1;
348
                    return true;
349
350
                case '.':
351
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
352
                    ctx.NextState = 5;
353
                    return true;
354
355
                case 'e':
356
                case 'E':
357
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
358
                    ctx.NextState = 7;
359
                    return true;
360
361
                default:
362
                    return false;
363
                }
364
            }
365
            return true;
366
        }
367
368
        private static bool State4 (FsmContext ctx)
369
        {
370
            ctx.L.GetChar ();
371
372
            if (ctx.L.input_char == ' ' ||
373
                ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
374
                ctx.Return = true;
375
                ctx.NextState = 1;
376
                return true;
377
            }
378
379
            switch (ctx.L.input_char) {
380
            case ',':
381
            case ']':
382
            case '}':
383
                ctx.L.UngetChar ();
384
                ctx.Return = true;
385
                ctx.NextState = 1;
386
                return true;
387
388
            case '.':
389
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
390
                ctx.NextState = 5;
391
                return true;
392
393
            case 'e':
394
            case 'E':
395
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
396
                ctx.NextState = 7;
397
                return true;
398
399
            default:
400
                return false;
401
            }
402
        }
403
404
        private static bool State5 (FsmContext ctx)
405
        {
406
            ctx.L.GetChar ();
407
408
            if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
409
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
410
                ctx.NextState = 6;
411
                return true;
412
            }
413
414
            return false;
415
        }
416
417
        private static bool State6 (FsmContext ctx)
418
        {
419
            while (ctx.L.GetChar ()) {
420
                if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
421
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
422
                    continue;
423
                }
424
425
                if (ctx.L.input_char == ' ' ||
426
                    ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
427
                    ctx.Return = true;
428
                    ctx.NextState = 1;
429
                    return true;
430
                }
431
432
                switch (ctx.L.input_char) {
433
                case ',':
434
                case ']':
435
                case '}':
436
                    ctx.L.UngetChar ();
437
                    ctx.Return = true;
438
                    ctx.NextState = 1;
439
                    return true;
440
441
                case 'e':
442
                case 'E':
443
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
444
                    ctx.NextState = 7;
445
                    return true;
446
447
                default:
448
                    return false;
449
                }
450
            }
451
452
            return true;
453
        }
454
455
        private static bool State7 (FsmContext ctx)
456
        {
457
            ctx.L.GetChar ();
458
459
            if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
460
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
461
                ctx.NextState = 8;
462
                return true;
463
            }
464
465
            switch (ctx.L.input_char) {
466
            case '+':
467
            case '-':
468
                ctx.L.string_buffer.Append ((char) ctx.L.input_char);
469
                ctx.NextState = 8;
470
                return true;
471
472
            default:
473
                return false;
474
            }
475
        }
476
477
        private static bool State8 (FsmContext ctx)
478
        {
479
            while (ctx.L.GetChar ()) {
480
                if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
481
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
482
                    continue;
483
                }
484
485
                if (ctx.L.input_char == ' ' ||
486
                    ctx.L.input_char >= '\t' && ctx.L.input_char<= '\r') {
487
                    ctx.Return = true;
488
                    ctx.NextState = 1;
489
                    return true;
490
                }
491
492
                switch (ctx.L.input_char) {
493
                case ',':
494
                case ']':
495
                case '}':
496
                    ctx.L.UngetChar ();
497
                    ctx.Return = true;
498
                    ctx.NextState = 1;
499
                    return true;
500
501
                default:
502
                    return false;
503
                }
504
            }
505
506
            return true;
507
        }
508
509
        private static bool State9 (FsmContext ctx)
510
        {
511
            ctx.L.GetChar ();
512
513
            switch (ctx.L.input_char) {
514
            case 'r':
515
                ctx.NextState = 10;
516
                return true;
517
518
            default:
519
                return false;
520
            }
521
        }
522
523
        private static bool State10 (FsmContext ctx)
524
        {
525
            ctx.L.GetChar ();
526
527
            switch (ctx.L.input_char) {
528
            case 'u':
529
                ctx.NextState = 11;
530
                return true;
531
532
            default:
533
                return false;
534
            }
535
        }
536
537
        private static bool State11 (FsmContext ctx)
538
        {
539
            ctx.L.GetChar ();
540
541
            switch (ctx.L.input_char) {
542
            case 'e':
543
                ctx.Return = true;
544
                ctx.NextState = 1;
545
                return true;
546
547
            default:
548
                return false;
549
            }
550
        }
551
552
        private static bool State12 (FsmContext ctx)
553
        {
554
            ctx.L.GetChar ();
555
556
            switch (ctx.L.input_char) {
557
            case 'a':
558
                ctx.NextState = 13;
559
                return true;
560
561
            default:
562
                return false;
563
            }
564
        }
565
566
        private static bool State13 (FsmContext ctx)
567
        {
568
            ctx.L.GetChar ();
569
570
            switch (ctx.L.input_char) {
571
            case 'l':
572
                ctx.NextState = 14;
573
                return true;
574
575
            default:
576
                return false;
577
            }
578
        }
579
580
        private static bool State14 (FsmContext ctx)
581
        {
582
            ctx.L.GetChar ();
583
584
            switch (ctx.L.input_char) {
585
            case 's':
586
                ctx.NextState = 15;
587
                return true;
588
589
            default:
590
                return false;
591
            }
592
        }
593
594
        private static bool State15 (FsmContext ctx)
595
        {
596
            ctx.L.GetChar ();
597
598
            switch (ctx.L.input_char) {
599
            case 'e':
600
                ctx.Return = true;
601
                ctx.NextState = 1;
602
                return true;
603
604
            default:
605
                return false;
606
            }
607
        }
608
609
        private static bool State16 (FsmContext ctx)
610
        {
611
            ctx.L.GetChar ();
612
613
            switch (ctx.L.input_char) {
614
            case 'u':
615
                ctx.NextState = 17;
616
                return true;
617
618
            default:
619
                return false;
620
            }
621
        }
622
623
        private static bool State17 (FsmContext ctx)
624
        {
625
            ctx.L.GetChar ();
626
627
            switch (ctx.L.input_char) {
628
            case 'l':
629
                ctx.NextState = 18;
630
                return true;
631
632
            default:
633
                return false;
634
            }
635
        }
636
637
        private static bool State18 (FsmContext ctx)
638
        {
639
            ctx.L.GetChar ();
640
641
            switch (ctx.L.input_char) {
642
            case 'l':
643
                ctx.Return = true;
644
                ctx.NextState = 1;
645
                return true;
646
647
            default:
648
                return false;
649
            }
650
        }
651
652
        private static bool State19 (FsmContext ctx)
653
        {
654
            while (ctx.L.GetChar ()) {
655
                switch (ctx.L.input_char) {
656
                case '"':
657
                    ctx.L.UngetChar ();
658
                    ctx.Return = true;
659
                    ctx.NextState = 20;
660
                    return true;
661
662
                case '\\':
663
                    ctx.StateStack = 19;
664
                    ctx.NextState = 21;
665
                    return true;
666
667
                default:
668
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
669
                    continue;
670
                }
671
            }
672
673
            return true;
674
        }
675
676
        private static bool State20 (FsmContext ctx)
677
        {
678
            ctx.L.GetChar ();
679
680
            switch (ctx.L.input_char) {
681
            case '"':
682
                ctx.Return = true;
683
                ctx.NextState = 1;
684
                return true;
685
686
            default:
687
                return false;
688
            }
689
        }
690
691
        private static bool State21 (FsmContext ctx)
692
        {
693
            ctx.L.GetChar ();
694
695
            switch (ctx.L.input_char) {
696
            case 'u':
697
                ctx.NextState = 22;
698
                return true;
699
700
            case '"':
701
            case '\'':
702
            case '/':
703
            case '\\':
704
            case 'b':
705
            case 'f':
706
            case 'n':
707
            case 'r':
708
            case 't':
709
                ctx.L.string_buffer.Append (
710
                    ProcessEscChar (ctx.L.input_char));
711
                ctx.NextState = ctx.StateStack;
712
                return true;
713
714
            default:
715
                return false;
716
            }
717
        }
718
719
        private static bool State22 (FsmContext ctx)
720
        {
721
            int counter = 0;
722
            int mult    = 4096;
723
724
            ctx.L.unichar = 0;
725
726
            while (ctx.L.GetChar ()) {
727
728
                if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9' ||
729
                    ctx.L.input_char >= 'A' && ctx.L.input_char <= 'F' ||
730
                    ctx.L.input_char >= 'a' && ctx.L.input_char <= 'f') {
731
732
                    ctx.L.unichar += HexValue (ctx.L.input_char) * mult;
733
734
                    counter++;
735
                    mult /= 16;
736
737
                    if (counter == 4) {
738
                        ctx.L.string_buffer.Append (
739
                            Convert.ToChar (ctx.L.unichar));
740
                        ctx.NextState = ctx.StateStack;
741
                        return true;
742
                    }
743
744
                    continue;
745
                }
746
747
                return false;
748
            }
749
750
            return true;
751
        }
752
753
        private static bool State23 (FsmContext ctx)
754
        {
755
            while (ctx.L.GetChar ()) {
756
                switch (ctx.L.input_char) {
757
                case '\'':
758
                    ctx.L.UngetChar ();
759
                    ctx.Return = true;
760
                    ctx.NextState = 24;
761
                    return true;
762
763
                case '\\':
764
                    ctx.StateStack = 23;
765
                    ctx.NextState = 21;
766
                    return true;
767
768
                default:
769
                    ctx.L.string_buffer.Append ((char) ctx.L.input_char);
770
                    continue;
771
                }
772
            }
773
774
            return true;
775
        }
776
777
        private static bool State24 (FsmContext ctx)
778
        {
779
            ctx.L.GetChar ();
780
781
            switch (ctx.L.input_char) {
782
            case '\'':
783
                ctx.L.input_char = '"';
784
                ctx.Return = true;
785
                ctx.NextState = 1;
786
                return true;
787
788
            default:
789
                return false;
790
            }
791
        }
792
793
        private static bool State25 (FsmContext ctx)
794
        {
795
            ctx.L.GetChar ();
796
797
            switch (ctx.L.input_char) {
798
            case '*':
799
                ctx.NextState = 27;
800
                return true;
801
802
            case '/':
803
                ctx.NextState = 26;
804
                return true;
805
806
            default:
807
                return false;
808
            }
809
        }
810
811
        private static bool State26 (FsmContext ctx)
812
        {
813
            while (ctx.L.GetChar ()) {
814
                if (ctx.L.input_char == '\n') {
815
                    ctx.NextState = 1;
816
                    return true;
817
                }
818
            }
819
820
            return true;
821
        }
822
823
        private static bool State27 (FsmContext ctx)
824
        {
825
            while (ctx.L.GetChar ()) {
826
                if (ctx.L.input_char == '*') {
827
                    ctx.NextState = 28;
828
                    return true;
829
                }
830
            }
831
832
            return true;
833
        }
834
835
        private static bool State28 (FsmContext ctx)
836
        {
837
            while (ctx.L.GetChar ()) {
838
                if (ctx.L.input_char == '*')
839
                    continue;
840
841
                if (ctx.L.input_char == '/') {
842
                    ctx.NextState = 1;
843
                    return true;
844
                }
845
846
                ctx.NextState = 27;
847
                return true;
848
            }
849
850
            return true;
851
        }
852
        #endregion
853
854
855
        private bool GetChar ()
856
        {
857
            if ((input_char = NextChar ()) != -1)
858
                return true;
859
860
            end_of_input = true;
861
            return false;
862
        }
863
864
        private int NextChar ()
865
        {
866
            if (input_buffer != 0) {
867
                int tmp = input_buffer;
868
                input_buffer = 0;
869
870
                return tmp;
871
            }
872
873
            return reader.Read ();
874
        }
875
876
        public bool NextToken ()
877
        {
878
            StateHandler handler;
879
            fsm_context.Return = false;
880
881
            while (true) {
882
                handler = fsm_handler_table[state - 1];
883
884
                if (! handler (fsm_context))
885
                    throw new JsonException (input_char);
886
887
                if (end_of_input)
888
                    return false;
889
890
                if (fsm_context.Return) {
891
                    string_value = string_buffer.ToString ();
892
                    string_buffer.Remove (0, string_buffer.Length);
893
                    token = fsm_return_table[state - 1];
894
895
                    if (token == (int) ParserToken.Char)
896
                        token = input_char;
897
898
                    state = fsm_context.NextState;
899
900
                    return true;
901
                }
902
903
                state = fsm_context.NextState;
904
            }
905
        }
906
907
        private void UngetChar ()
908
        {
909
            input_buffer = input_char;
910
        }
911
    }
912
}