1
2
3
4
5 package bidi
6
7 import (
8 "fmt"
9 "log"
10 )
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 type level int8
57
58 const implicitLevel level = -1
59
60
61 func (c Class) in(set ...Class) bool {
62 for _, s := range set {
63 if c == s {
64 return true
65 }
66 }
67 return false
68 }
69
70
71 type paragraph struct {
72 initialTypes []Class
73
74
75 pairTypes []bracketType
76 pairValues []rune
77
78 embeddingLevel level
79
80
81 resultTypes []Class
82 resultLevels []level
83
84
85
86
87
88 matchingPDI []int
89
90
91
92
93 matchingIsolateInitiator []int
94 }
95
96
97
98
99
100
101
102
103 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
104 var err error
105 if err = validateTypes(types); err != nil {
106 return nil, err
107 }
108 if err = validatePbTypes(pairTypes); err != nil {
109 return nil, err
110 }
111 if err = validatePbValues(pairValues, pairTypes); err != nil {
112 return nil, err
113 }
114 if err = validateParagraphEmbeddingLevel(levels); err != nil {
115 return nil, err
116 }
117
118 p := ¶graph{
119 initialTypes: append([]Class(nil), types...),
120 embeddingLevel: levels,
121
122 pairTypes: pairTypes,
123 pairValues: pairValues,
124
125 resultTypes: append([]Class(nil), types...),
126 }
127 p.run()
128 return p, nil
129 }
130
131 func (p *paragraph) Len() int { return len(p.initialTypes) }
132
133
134
135 func (p *paragraph) run() {
136 p.determineMatchingIsolates()
137
138
139
140
141
142 if p.embeddingLevel == implicitLevel {
143 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
144 }
145
146
147 p.resultLevels = make([]level, p.Len())
148 setLevels(p.resultLevels, p.embeddingLevel)
149
150
151
152 p.determineExplicitEmbeddingLevels()
153
154
155
156
157
158
159
160
161
162 for _, seq := range p.determineIsolatingRunSequences() {
163
164
165 seq.resolveWeakTypes()
166
167
168
169 resolvePairedBrackets(seq)
170
171
172
173 seq.resolveNeutralTypes()
174
175
176
177 seq.resolveImplicitLevels()
178
179
180 seq.applyLevelsAndTypes()
181 }
182
183
184
185
186 p.assignLevelsToCharactersRemovedByX9()
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 func (p *paragraph) determineMatchingIsolates() {
205 p.matchingPDI = make([]int, p.Len())
206 p.matchingIsolateInitiator = make([]int, p.Len())
207
208 for i := range p.matchingIsolateInitiator {
209 p.matchingIsolateInitiator[i] = -1
210 }
211
212 for i := range p.matchingPDI {
213 p.matchingPDI[i] = -1
214
215 if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
216 depthCounter := 1
217 for j := i + 1; j < p.Len(); j++ {
218 if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
219 depthCounter++
220 } else if u == PDI {
221 if depthCounter--; depthCounter == 0 {
222 p.matchingPDI[i] = j
223 p.matchingIsolateInitiator[j] = i
224 break
225 }
226 }
227 }
228 if p.matchingPDI[i] == -1 {
229 p.matchingPDI[i] = p.Len()
230 }
231 }
232 }
233 }
234
235
236
237
238
239
240 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
241 var strongType Class = unknownClass
242
243
244 for i := start; i < end; i++ {
245 if t := p.resultTypes[i]; t.in(L, AL, R) {
246 strongType = t
247 break
248 } else if t.in(FSI, LRI, RLI) {
249 i = p.matchingPDI[i]
250 if i > end {
251 log.Panic("assert (i <= end)")
252 }
253 }
254 }
255
256 switch strongType {
257 case unknownClass:
258
259 return 0
260 case L:
261 return 0
262 default:
263 return 1
264 }
265 }
266
267 const maxDepth = 125
268
269
270
271 type directionalStatusStack struct {
272 stackCounter int
273 embeddingLevelStack [maxDepth + 1]level
274 overrideStatusStack [maxDepth + 1]Class
275 isolateStatusStack [maxDepth + 1]bool
276 }
277
278 func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
279 func (s *directionalStatusStack) pop() { s.stackCounter-- }
280 func (s *directionalStatusStack) depth() int { return s.stackCounter }
281
282 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
283 s.embeddingLevelStack[s.stackCounter] = level
284 s.overrideStatusStack[s.stackCounter] = overrideStatus
285 s.isolateStatusStack[s.stackCounter] = isolateStatus
286 s.stackCounter++
287 }
288
289 func (s *directionalStatusStack) lastEmbeddingLevel() level {
290 return s.embeddingLevelStack[s.stackCounter-1]
291 }
292
293 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
294 return s.overrideStatusStack[s.stackCounter-1]
295 }
296
297 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
298 return s.isolateStatusStack[s.stackCounter-1]
299 }
300
301
302 func (p *paragraph) determineExplicitEmbeddingLevels() {
303 var stack directionalStatusStack
304 var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
305
306
307 stack.push(p.embeddingLevel, ON, false)
308
309 for i, t := range p.resultTypes {
310
311 switch t {
312 case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
313 isIsolate := t.in(RLI, LRI, FSI)
314 isRTL := t.in(RLE, RLO, RLI)
315
316
317 if t == FSI {
318 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
319 }
320 if isIsolate {
321 p.resultLevels[i] = stack.lastEmbeddingLevel()
322 if stack.lastDirectionalOverrideStatus() != ON {
323 p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
324 }
325 }
326
327 var newLevel level
328 if isRTL {
329
330 newLevel = (stack.lastEmbeddingLevel() + 1) | 1
331 } else {
332
333 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
334 }
335
336 if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
337 if isIsolate {
338 validIsolateCount++
339 }
340
341
342
343
344 switch t {
345 case LRO:
346 stack.push(newLevel, L, isIsolate)
347 case RLO:
348 stack.push(newLevel, R, isIsolate)
349 default:
350 stack.push(newLevel, ON, isIsolate)
351 }
352
353 if !isIsolate {
354 p.resultLevels[i] = newLevel
355 }
356 } else {
357
358
359 if isIsolate {
360 overflowIsolateCount++
361 } else {
362 if overflowIsolateCount == 0 {
363 overflowEmbeddingCount++
364 }
365 }
366 }
367
368
369 case PDI:
370 if overflowIsolateCount > 0 {
371 overflowIsolateCount--
372 } else if validIsolateCount == 0 {
373
374 } else {
375 overflowEmbeddingCount = 0
376 for !stack.lastDirectionalIsolateStatus() {
377 stack.pop()
378 }
379 stack.pop()
380 validIsolateCount--
381 }
382 p.resultLevels[i] = stack.lastEmbeddingLevel()
383
384
385 case PDF:
386
387 p.resultLevels[i] = stack.lastEmbeddingLevel()
388
389 if overflowIsolateCount > 0 {
390
391 } else if overflowEmbeddingCount > 0 {
392 overflowEmbeddingCount--
393 } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
394 stack.pop()
395 }
396
397 case B:
398
399
400
401
402 stack.empty()
403 overflowIsolateCount = 0
404 overflowEmbeddingCount = 0
405 validIsolateCount = 0
406 p.resultLevels[i] = p.embeddingLevel
407
408 default:
409 p.resultLevels[i] = stack.lastEmbeddingLevel()
410 if stack.lastDirectionalOverrideStatus() != ON {
411 p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
412 }
413 }
414 }
415 }
416
417 type isolatingRunSequence struct {
418 p *paragraph
419
420 indexes []int
421
422 types []Class
423 resolvedLevels []level
424 level level
425 sos, eos Class
426 }
427
428 func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
429
430
431
432 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
433 length := len(indexes)
434 types := make([]Class, length)
435 for i, x := range indexes {
436 types[i] = p.resultTypes[x]
437 }
438
439
440 prevChar := indexes[0] - 1
441 for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
442 prevChar--
443 }
444 prevLevel := p.embeddingLevel
445 if prevChar >= 0 {
446 prevLevel = p.resultLevels[prevChar]
447 }
448
449 var succLevel level
450 lastType := types[length-1]
451 if lastType.in(LRI, RLI, FSI) {
452 succLevel = p.embeddingLevel
453 } else {
454
455 limit := indexes[length-1] + 1
456 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
457
458 }
459 succLevel = p.embeddingLevel
460 if limit < p.Len() {
461 succLevel = p.resultLevels[limit]
462 }
463 }
464 level := p.resultLevels[indexes[0]]
465 return &isolatingRunSequence{
466 p: p,
467 indexes: indexes,
468 types: types,
469 level: level,
470 sos: typeForLevel(max(prevLevel, level)),
471 eos: typeForLevel(max(succLevel, level)),
472 }
473 }
474
475
476
477
478
479 func (s *isolatingRunSequence) resolveWeakTypes() {
480
481
482 s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
483
484
485
486 precedingCharacterType := s.sos
487 for i, t := range s.types {
488 if t == NSM {
489 s.types[i] = precedingCharacterType
490 } else {
491
492
493
494 precedingCharacterType = t
495 }
496 }
497
498
499
500 for i, t := range s.types {
501 if t == EN {
502 for j := i - 1; j >= 0; j-- {
503 if t := s.types[j]; t.in(L, R, AL) {
504 if t == AL {
505 s.types[i] = AN
506 }
507 break
508 }
509 }
510 }
511 }
512
513
514 for i, t := range s.types {
515 if t == AL {
516 s.types[i] = R
517 }
518 }
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533 for i := 1; i < s.Len()-1; i++ {
534 t := s.types[i]
535 if t == ES || t == CS {
536 prevSepType := s.types[i-1]
537 succSepType := s.types[i+1]
538 if prevSepType == EN && succSepType == EN {
539 s.types[i] = EN
540 } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
541 s.types[i] = AN
542 }
543 }
544 }
545
546
547 for i, t := range s.types {
548 if t == ET {
549
550 runStart := i
551 runEnd := s.findRunLimit(runStart, ET)
552
553
554 t := s.sos
555 if runStart > 0 {
556 t = s.types[runStart-1]
557 }
558 if t != EN {
559 t = s.eos
560 if runEnd < len(s.types) {
561 t = s.types[runEnd]
562 }
563 }
564 if t == EN {
565 setTypes(s.types[runStart:runEnd], EN)
566 }
567
568 i = runEnd
569 }
570 }
571
572
573 for i, t := range s.types {
574 if t.in(ES, ET, CS) {
575 s.types[i] = ON
576 }
577 }
578
579
580 for i, t := range s.types {
581 if t == EN {
582
583 prevStrongType := s.sos
584 for j := i - 1; j >= 0; j-- {
585 t = s.types[j]
586 if t == L || t == R {
587 prevStrongType = t
588 break
589 }
590 }
591 if prevStrongType == L {
592 s.types[i] = L
593 }
594 }
595 }
596 }
597
598
599 func (s *isolatingRunSequence) resolveNeutralTypes() {
600
601
602 s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
603
604 for i, t := range s.types {
605 switch t {
606 case WS, ON, B, S, RLI, LRI, FSI, PDI:
607
608 runStart := i
609 runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
610
611
612 var leadType, trailType Class
613
614
615
616 if runStart == 0 {
617 leadType = s.sos
618 } else {
619 leadType = s.types[runStart-1]
620 if leadType.in(AN, EN) {
621 leadType = R
622 }
623 }
624 if runEnd == len(s.types) {
625 trailType = s.eos
626 } else {
627 trailType = s.types[runEnd]
628 if trailType.in(AN, EN) {
629 trailType = R
630 }
631 }
632
633 var resolvedType Class
634 if leadType == trailType {
635
636 resolvedType = leadType
637 } else {
638
639
640
641 resolvedType = typeForLevel(s.level)
642 }
643
644 setTypes(s.types[runStart:runEnd], resolvedType)
645
646
647 i = runEnd
648 }
649 }
650 }
651
652 func setLevels(levels []level, newLevel level) {
653 for i := range levels {
654 levels[i] = newLevel
655 }
656 }
657
658 func setTypes(types []Class, newType Class) {
659 for i := range types {
660 types[i] = newType
661 }
662 }
663
664
665 func (s *isolatingRunSequence) resolveImplicitLevels() {
666
667
668 s.assertOnly(L, R, EN, AN)
669
670 s.resolvedLevels = make([]level, len(s.types))
671 setLevels(s.resolvedLevels, s.level)
672
673 if (s.level & 1) == 0 {
674 for i, t := range s.types {
675
676 if t == L {
677
678 } else if t == R {
679 s.resolvedLevels[i] += 1
680 } else {
681 s.resolvedLevels[i] += 2
682 }
683 }
684 } else {
685 for i, t := range s.types {
686
687 if t == R {
688
689 } else {
690 s.resolvedLevels[i] += 1
691 }
692 }
693 }
694 }
695
696
697
698 func (s *isolatingRunSequence) applyLevelsAndTypes() {
699 for i, x := range s.indexes {
700 s.p.resultTypes[x] = s.types[i]
701 s.p.resultLevels[x] = s.resolvedLevels[i]
702 }
703 }
704
705
706
707
708 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
709 loop:
710 for ; index < len(s.types); index++ {
711 t := s.types[index]
712 for _, valid := range validSet {
713 if t == valid {
714 continue loop
715 }
716 }
717 return index
718 }
719 return len(s.types)
720 }
721
722
723
724 func (s *isolatingRunSequence) assertOnly(codes ...Class) {
725 loop:
726 for i, t := range s.types {
727 for _, c := range codes {
728 if t == c {
729 continue loop
730 }
731 }
732 log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
733 }
734 }
735
736
737
738
739
740
741
742 func (p *paragraph) determineLevelRuns() [][]int {
743 run := []int{}
744 allRuns := [][]int{}
745 currentLevel := implicitLevel
746
747 for i := range p.initialTypes {
748 if !isRemovedByX9(p.initialTypes[i]) {
749 if p.resultLevels[i] != currentLevel {
750
751 if currentLevel >= 0 {
752 allRuns = append(allRuns, run)
753 run = nil
754 }
755
756 currentLevel = p.resultLevels[i]
757 }
758 run = append(run, i)
759 }
760 }
761
762 if len(run) > 0 {
763 allRuns = append(allRuns, run)
764 }
765 return allRuns
766 }
767
768
769 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
770 levelRuns := p.determineLevelRuns()
771
772
773 runForCharacter := make([]int, p.Len())
774 for i, run := range levelRuns {
775 for _, index := range run {
776 runForCharacter[index] = i
777 }
778 }
779
780 sequences := []*isolatingRunSequence{}
781
782 var currentRunSequence []int
783
784 for _, run := range levelRuns {
785 first := run[0]
786 if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
787 currentRunSequence = nil
788
789 for {
790
791 currentRunSequence = append(currentRunSequence, run...)
792
793 last := currentRunSequence[len(currentRunSequence)-1]
794 lastT := p.initialTypes[last]
795 if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
796 run = levelRuns[runForCharacter[p.matchingPDI[last]]]
797 } else {
798 break
799 }
800 }
801 sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
802 }
803 }
804 return sequences
805 }
806
807
808
809
810
811 func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
812 for i, t := range p.initialTypes {
813 if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
814 p.resultTypes[i] = t
815 p.resultLevels[i] = -1
816 }
817 }
818
819
820
821
822 if p.resultLevels[0] == -1 {
823 p.resultLevels[0] = p.embeddingLevel
824 }
825 for i := 1; i < len(p.initialTypes); i++ {
826 if p.resultLevels[i] == -1 {
827 p.resultLevels[i] = p.resultLevels[i-1]
828 }
829 }
830
831
832 }
833
834
835
836
837
838
839
840
841
842
843
844 func (p *paragraph) getLevels(linebreaks []int) []level {
845
846
847
848
849
850
851
852
853
854
855
856 validateLineBreaks(linebreaks, p.Len())
857
858 result := append([]level(nil), p.resultLevels...)
859
860
861
862
863 for i, t := range p.initialTypes {
864 if t.in(B, S) {
865
866 result[i] = p.embeddingLevel
867
868
869 for j := i - 1; j >= 0; j-- {
870 if isWhitespace(p.initialTypes[j]) {
871 result[j] = p.embeddingLevel
872 } else {
873 break
874 }
875 }
876 }
877 }
878
879
880 start := 0
881 for _, limit := range linebreaks {
882 for j := limit - 1; j >= start; j-- {
883 if isWhitespace(p.initialTypes[j]) {
884 result[j] = p.embeddingLevel
885 } else {
886 break
887 }
888 }
889 start = limit
890 }
891
892 return result
893 }
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910 func (p *paragraph) getReordering(linebreaks []int) []int {
911 validateLineBreaks(linebreaks, p.Len())
912
913 return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
914 }
915
916
917
918 func computeMultilineReordering(levels []level, linebreaks []int) []int {
919 result := make([]int, len(levels))
920
921 start := 0
922 for _, limit := range linebreaks {
923 tempLevels := make([]level, limit-start)
924 copy(tempLevels, levels[start:])
925
926 for j, order := range computeReordering(tempLevels) {
927 result[start+j] = order + start
928 }
929 start = limit
930 }
931 return result
932 }
933
934
935
936
937 func computeReordering(levels []level) []int {
938 result := make([]int, len(levels))
939
940 for i := range result {
941 result[i] = i
942 }
943
944
945
946
947 highestLevel := level(0)
948 lowestOddLevel := level(maxDepth + 2)
949 for _, level := range levels {
950 if level > highestLevel {
951 highestLevel = level
952 }
953 if level&1 != 0 && level < lowestOddLevel {
954 lowestOddLevel = level
955 }
956 }
957
958 for level := highestLevel; level >= lowestOddLevel; level-- {
959 for i := 0; i < len(levels); i++ {
960 if levels[i] >= level {
961
962 start := i
963 limit := i + 1
964 for limit < len(levels) && levels[limit] >= level {
965 limit++
966 }
967
968 for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
969 result[j], result[k] = result[k], result[j]
970 }
971
972 i = limit
973 }
974 }
975 }
976
977 return result
978 }
979
980
981
982 func isWhitespace(c Class) bool {
983 switch c {
984 case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
985 return true
986 }
987 return false
988 }
989
990
991 func isRemovedByX9(c Class) bool {
992 switch c {
993 case LRE, RLE, LRO, RLO, PDF, BN:
994 return true
995 }
996 return false
997 }
998
999
1000 func typeForLevel(level level) Class {
1001 if (level & 0x1) == 0 {
1002 return L
1003 }
1004 return R
1005 }
1006
1007 func validateTypes(types []Class) error {
1008 if len(types) == 0 {
1009 return fmt.Errorf("types is null")
1010 }
1011 for i, t := range types[:len(types)-1] {
1012 if t == B {
1013 return fmt.Errorf("B type before end of paragraph at index: %d", i)
1014 }
1015 }
1016 return nil
1017 }
1018
1019 func validateParagraphEmbeddingLevel(embeddingLevel level) error {
1020 if embeddingLevel != implicitLevel &&
1021 embeddingLevel != 0 &&
1022 embeddingLevel != 1 {
1023 return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
1024 }
1025 return nil
1026 }
1027
1028 func validateLineBreaks(linebreaks []int, textLength int) error {
1029 prev := 0
1030 for i, next := range linebreaks {
1031 if next <= prev {
1032 return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
1033 }
1034 prev = next
1035 }
1036 if prev != textLength {
1037 return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
1038 }
1039 return nil
1040 }
1041
1042 func validatePbTypes(pairTypes []bracketType) error {
1043 if len(pairTypes) == 0 {
1044 return fmt.Errorf("pairTypes is null")
1045 }
1046 for i, pt := range pairTypes {
1047 switch pt {
1048 case bpNone, bpOpen, bpClose:
1049 default:
1050 return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
1051 }
1052 }
1053 return nil
1054 }
1055
1056 func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
1057 if pairValues == nil {
1058 return fmt.Errorf("pairValues is null")
1059 }
1060 if len(pairTypes) != len(pairValues) {
1061 return fmt.Errorf("pairTypes is different length from pairValues")
1062 }
1063 return nil
1064 }
1065
View as plain text