core.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bidi
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  )
    11  
    12  // This implementation is a port based on the reference implementation found at:
    13  // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
    14  //
    15  // described in Unicode Bidirectional Algorithm (UAX #9).
    16  //
    17  // Input:
    18  // There are two levels of input to the algorithm, since clients may prefer to
    19  // supply some information from out-of-band sources rather than relying on the
    20  // default behavior.
    21  //
    22  // - Bidi class array
    23  // - Bidi class array, with externally supplied base line direction
    24  //
    25  // Output:
    26  // Output is separated into several stages:
    27  //
    28  //  - levels array over entire paragraph
    29  //  - reordering array over entire paragraph
    30  //  - levels array over line
    31  //  - reordering array over line
    32  //
    33  // Note that for conformance to the Unicode Bidirectional Algorithm,
    34  // implementations are only required to generate correct reordering and
    35  // character directionality (odd or even levels) over a line. Generating
    36  // identical level arrays over a line is not required. Bidi explicit format
    37  // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
    38  // positions as long as the rest of the input is properly reordered.
    39  //
    40  // As the algorithm is defined to operate on a single paragraph at a time, this
    41  // implementation is written to handle single paragraphs. Thus rule P1 is
    42  // presumed by this implementation-- the data provided to the implementation is
    43  // assumed to be a single paragraph, and either contains no 'B' codes, or a
    44  // single 'B' code at the end of the input. 'B' is allowed as input to
    45  // illustrate how the algorithm assigns it a level.
    46  //
    47  // Also note that rules L3 and L4 depend on the rendering engine that uses the
    48  // result of the bidi algorithm. This implementation assumes that the rendering
    49  // engine expects combining marks in visual order (e.g. to the left of their
    50  // base character in RTL runs) and that it adjusts the glyphs used to render
    51  // mirrored characters that are in RTL runs so that they render appropriately.
    52  
    53  // level is the embedding level of a character. Even embedding levels indicate
    54  // left-to-right order and odd levels indicate right-to-left order. The special
    55  // level of -1 is reserved for undefined order.
    56  type level int8
    57  
    58  const implicitLevel level = -1
    59  
    60  // in returns if x is equal to any of the values in set.
    61  func (c Class) in(set ...Class) bool {
    62  	for _, s := range set {
    63  		if c == s {
    64  			return true
    65  		}
    66  	}
    67  	return false
    68  }
    69  
    70  // A paragraph contains the state of a paragraph.
    71  type paragraph struct {
    72  	initialTypes []Class
    73  
    74  	// Arrays of properties needed for paired bracket evaluation in N0
    75  	pairTypes  []bracketType // paired Bracket types for paragraph
    76  	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
    77  
    78  	embeddingLevel level // default: = implicitLevel;
    79  
    80  	// at the paragraph levels
    81  	resultTypes  []Class
    82  	resultLevels []level
    83  
    84  	// Index of matching PDI for isolate initiator characters. For other
    85  	// characters, the value of matchingPDI will be set to -1. For isolate
    86  	// initiators with no matching PDI, matchingPDI will be set to the length of
    87  	// the input string.
    88  	matchingPDI []int
    89  
    90  	// Index of matching isolate initiator for PDI characters. For other
    91  	// characters, and for PDIs with no matching isolate initiator, the value of
    92  	// matchingIsolateInitiator will be set to -1.
    93  	matchingIsolateInitiator []int
    94  }
    95  
    96  // newParagraph initializes a paragraph. The user needs to supply a few arrays
    97  // corresponding to the preprocessed text input. The types correspond to the
    98  // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
    99  // each rune. pairValues provides a unique bracket class identifier for each
   100  // rune (suggested is the rune of the open bracket for opening and matching
   101  // close brackets, after normalization). The embedding levels are optional, but
   102  // may be supplied to encode embedding levels of styled text.
   103  func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
   104  	var err error
   105  	if err = validateTypes(types); err != nil {
   106  		return nil, err
   107  	}
   108  	if err = validatePbTypes(pairTypes); err != nil {
   109  		return nil, err
   110  	}
   111  	if err = validatePbValues(pairValues, pairTypes); err != nil {
   112  		return nil, err
   113  	}
   114  	if err = validateParagraphEmbeddingLevel(levels); err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	p := &paragraph{
   119  		initialTypes:   append([]Class(nil), types...),
   120  		embeddingLevel: levels,
   121  
   122  		pairTypes:  pairTypes,
   123  		pairValues: pairValues,
   124  
   125  		resultTypes: append([]Class(nil), types...),
   126  	}
   127  	p.run()
   128  	return p, nil
   129  }
   130  
   131  func (p *paragraph) Len() int { return len(p.initialTypes) }
   132  
   133  // The algorithm. Does not include line-based processing (Rules L1, L2).
   134  // These are applied later in the line-based phase of the algorithm.
   135  func (p *paragraph) run() {
   136  	p.determineMatchingIsolates()
   137  
   138  	// 1) determining the paragraph level
   139  	// Rule P1 is the requirement for entering this algorithm.
   140  	// Rules P2, P3.
   141  	// If no externally supplied paragraph embedding level, use default.
   142  	if p.embeddingLevel == implicitLevel {
   143  		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
   144  	}
   145  
   146  	// Initialize result levels to paragraph embedding level.
   147  	p.resultLevels = make([]level, p.Len())
   148  	setLevels(p.resultLevels, p.embeddingLevel)
   149  
   150  	// 2) Explicit levels and directions
   151  	// Rules X1-X8.
   152  	p.determineExplicitEmbeddingLevels()
   153  
   154  	// Rule X9.
   155  	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
   156  	// from the string explicitly. But they are not copied into isolating run
   157  	// sequences when they are created, so they are removed for all
   158  	// practical purposes.
   159  
   160  	// Rule X10.
   161  	// Run remainder of algorithm one isolating run sequence at a time
   162  	for _, seq := range p.determineIsolatingRunSequences() {
   163  		// 3) resolving weak types
   164  		// Rules W1-W7.
   165  		seq.resolveWeakTypes()
   166  
   167  		// 4a) resolving paired brackets
   168  		// Rule N0
   169  		resolvePairedBrackets(seq)
   170  
   171  		// 4b) resolving neutral types
   172  		// Rules N1-N3.
   173  		seq.resolveNeutralTypes()
   174  
   175  		// 5) resolving implicit embedding levels
   176  		// Rules I1, I2.
   177  		seq.resolveImplicitLevels()
   178  
   179  		// Apply the computed levels and types
   180  		seq.applyLevelsAndTypes()
   181  	}
   182  
   183  	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
   184  	// BNs. This is for convenience, so the resulting level array will have
   185  	// a value for every character.
   186  	p.assignLevelsToCharactersRemovedByX9()
   187  }
   188  
   189  // determineMatchingIsolates determines the matching PDI for each isolate
   190  // initiator and vice versa.
   191  //
   192  // Definition BD9.
   193  //
   194  // At the end of this function:
   195  //
   196  //   - The member variable matchingPDI is set to point to the index of the
   197  //     matching PDI character for each isolate initiator character. If there is
   198  //     no matching PDI, it is set to the length of the input text. For other
   199  //     characters, it is set to -1.
   200  //   - The member variable matchingIsolateInitiator is set to point to the
   201  //     index of the matching isolate initiator character for each PDI character.
   202  //     If there is no matching isolate initiator, or the character is not a PDI,
   203  //     it is set to -1.
   204  func (p *paragraph) determineMatchingIsolates() {
   205  	p.matchingPDI = make([]int, p.Len())
   206  	p.matchingIsolateInitiator = make([]int, p.Len())
   207  
   208  	for i := range p.matchingIsolateInitiator {
   209  		p.matchingIsolateInitiator[i] = -1
   210  	}
   211  
   212  	for i := range p.matchingPDI {
   213  		p.matchingPDI[i] = -1
   214  
   215  		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
   216  			depthCounter := 1
   217  			for j := i + 1; j < p.Len(); j++ {
   218  				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
   219  					depthCounter++
   220  				} else if u == PDI {
   221  					if depthCounter--; depthCounter == 0 {
   222  						p.matchingPDI[i] = j
   223  						p.matchingIsolateInitiator[j] = i
   224  						break
   225  					}
   226  				}
   227  			}
   228  			if p.matchingPDI[i] == -1 {
   229  				p.matchingPDI[i] = p.Len()
   230  			}
   231  		}
   232  	}
   233  }
   234  
   235  // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
   236  // the substring limited by the given range [start, end).
   237  //
   238  // Determines the paragraph level based on rules P2, P3. This is also used
   239  // in rule X5c to find if an FSI should resolve to LRI or RLI.
   240  func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
   241  	var strongType Class = unknownClass
   242  
   243  	// Rule P2.
   244  	for i := start; i < end; i++ {
   245  		if t := p.resultTypes[i]; t.in(L, AL, R) {
   246  			strongType = t
   247  			break
   248  		} else if t.in(FSI, LRI, RLI) {
   249  			i = p.matchingPDI[i] // skip over to the matching PDI
   250  			if i > end {
   251  				log.Panic("assert (i <= end)")
   252  			}
   253  		}
   254  	}
   255  	// Rule P3.
   256  	switch strongType {
   257  	case unknownClass: // none found
   258  		// default embedding level when no strong types found is 0.
   259  		return 0
   260  	case L:
   261  		return 0
   262  	default: // AL, R
   263  		return 1
   264  	}
   265  }
   266  
   267  const maxDepth = 125
   268  
   269  // This stack will store the embedding levels and override and isolated
   270  // statuses
   271  type directionalStatusStack struct {
   272  	stackCounter        int
   273  	embeddingLevelStack [maxDepth + 1]level
   274  	overrideStatusStack [maxDepth + 1]Class
   275  	isolateStatusStack  [maxDepth + 1]bool
   276  }
   277  
   278  func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
   279  func (s *directionalStatusStack) pop()       { s.stackCounter-- }
   280  func (s *directionalStatusStack) depth() int { return s.stackCounter }
   281  
   282  func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
   283  	s.embeddingLevelStack[s.stackCounter] = level
   284  	s.overrideStatusStack[s.stackCounter] = overrideStatus
   285  	s.isolateStatusStack[s.stackCounter] = isolateStatus
   286  	s.stackCounter++
   287  }
   288  
   289  func (s *directionalStatusStack) lastEmbeddingLevel() level {
   290  	return s.embeddingLevelStack[s.stackCounter-1]
   291  }
   292  
   293  func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
   294  	return s.overrideStatusStack[s.stackCounter-1]
   295  }
   296  
   297  func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
   298  	return s.isolateStatusStack[s.stackCounter-1]
   299  }
   300  
   301  // Determine explicit levels using rules X1 - X8
   302  func (p *paragraph) determineExplicitEmbeddingLevels() {
   303  	var stack directionalStatusStack
   304  	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
   305  
   306  	// Rule X1.
   307  	stack.push(p.embeddingLevel, ON, false)
   308  
   309  	for i, t := range p.resultTypes {
   310  		// Rules X2, X3, X4, X5, X5a, X5b, X5c
   311  		switch t {
   312  		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
   313  			isIsolate := t.in(RLI, LRI, FSI)
   314  			isRTL := t.in(RLE, RLO, RLI)
   315  
   316  			// override if this is an FSI that resolves to RLI
   317  			if t == FSI {
   318  				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
   319  			}
   320  			if isIsolate {
   321  				p.resultLevels[i] = stack.lastEmbeddingLevel()
   322  				if stack.lastDirectionalOverrideStatus() != ON {
   323  					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   324  				}
   325  			}
   326  
   327  			var newLevel level
   328  			if isRTL {
   329  				// least greater odd
   330  				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
   331  			} else {
   332  				// least greater even
   333  				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
   334  			}
   335  
   336  			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
   337  				if isIsolate {
   338  					validIsolateCount++
   339  				}
   340  				// Push new embedding level, override status, and isolated
   341  				// status.
   342  				// No check for valid stack counter, since the level check
   343  				// suffices.
   344  				switch t {
   345  				case LRO:
   346  					stack.push(newLevel, L, isIsolate)
   347  				case RLO:
   348  					stack.push(newLevel, R, isIsolate)
   349  				default:
   350  					stack.push(newLevel, ON, isIsolate)
   351  				}
   352  				// Not really part of the spec
   353  				if !isIsolate {
   354  					p.resultLevels[i] = newLevel
   355  				}
   356  			} else {
   357  				// This is an invalid explicit formatting character,
   358  				// so apply the "Otherwise" part of rules X2-X5b.
   359  				if isIsolate {
   360  					overflowIsolateCount++
   361  				} else { // !isIsolate
   362  					if overflowIsolateCount == 0 {
   363  						overflowEmbeddingCount++
   364  					}
   365  				}
   366  			}
   367  
   368  		// Rule X6a
   369  		case PDI:
   370  			if overflowIsolateCount > 0 {
   371  				overflowIsolateCount--
   372  			} else if validIsolateCount == 0 {
   373  				// do nothing
   374  			} else {
   375  				overflowEmbeddingCount = 0
   376  				for !stack.lastDirectionalIsolateStatus() {
   377  					stack.pop()
   378  				}
   379  				stack.pop()
   380  				validIsolateCount--
   381  			}
   382  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   383  
   384  		// Rule X7
   385  		case PDF:
   386  			// Not really part of the spec
   387  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   388  
   389  			if overflowIsolateCount > 0 {
   390  				// do nothing
   391  			} else if overflowEmbeddingCount > 0 {
   392  				overflowEmbeddingCount--
   393  			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
   394  				stack.pop()
   395  			}
   396  
   397  		case B: // paragraph separator.
   398  			// Rule X8.
   399  
   400  			// These values are reset for clarity, in this implementation B
   401  			// can only occur as the last code in the array.
   402  			stack.empty()
   403  			overflowIsolateCount = 0
   404  			overflowEmbeddingCount = 0
   405  			validIsolateCount = 0
   406  			p.resultLevels[i] = p.embeddingLevel
   407  
   408  		default:
   409  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   410  			if stack.lastDirectionalOverrideStatus() != ON {
   411  				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   412  			}
   413  		}
   414  	}
   415  }
   416  
   417  type isolatingRunSequence struct {
   418  	p *paragraph
   419  
   420  	indexes []int // indexes to the original string
   421  
   422  	types          []Class // type of each character using the index
   423  	resolvedLevels []level // resolved levels after application of rules
   424  	level          level
   425  	sos, eos       Class
   426  }
   427  
   428  func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
   429  
   430  // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
   431  // either L or R, for each isolating run sequence.
   432  func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
   433  	length := len(indexes)
   434  	types := make([]Class, length)
   435  	for i, x := range indexes {
   436  		types[i] = p.resultTypes[x]
   437  	}
   438  
   439  	// assign level, sos and eos
   440  	prevChar := indexes[0] - 1
   441  	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
   442  		prevChar--
   443  	}
   444  	prevLevel := p.embeddingLevel
   445  	if prevChar >= 0 {
   446  		prevLevel = p.resultLevels[prevChar]
   447  	}
   448  
   449  	var succLevel level
   450  	lastType := types[length-1]
   451  	if lastType.in(LRI, RLI, FSI) {
   452  		succLevel = p.embeddingLevel
   453  	} else {
   454  		// the first character after the end of run sequence
   455  		limit := indexes[length-1] + 1
   456  		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
   457  
   458  		}
   459  		succLevel = p.embeddingLevel
   460  		if limit < p.Len() {
   461  			succLevel = p.resultLevels[limit]
   462  		}
   463  	}
   464  	level := p.resultLevels[indexes[0]]
   465  	return &isolatingRunSequence{
   466  		p:       p,
   467  		indexes: indexes,
   468  		types:   types,
   469  		level:   level,
   470  		sos:     typeForLevel(max(prevLevel, level)),
   471  		eos:     typeForLevel(max(succLevel, level)),
   472  	}
   473  }
   474  
   475  // Resolving weak types Rules W1-W7.
   476  //
   477  // Note that some weak types (EN, AN) remain after this processing is
   478  // complete.
   479  func (s *isolatingRunSequence) resolveWeakTypes() {
   480  
   481  	// on entry, only these types remain
   482  	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
   483  
   484  	// Rule W1.
   485  	// Changes all NSMs.
   486  	precedingCharacterType := s.sos
   487  	for i, t := range s.types {
   488  		if t == NSM {
   489  			s.types[i] = precedingCharacterType
   490  		} else {
   491  			// if t.in(LRI, RLI, FSI, PDI) {
   492  			// 	precedingCharacterType = ON
   493  			// }
   494  			precedingCharacterType = t
   495  		}
   496  	}
   497  
   498  	// Rule W2.
   499  	// EN does not change at the start of the run, because sos != AL.
   500  	for i, t := range s.types {
   501  		if t == EN {
   502  			for j := i - 1; j >= 0; j-- {
   503  				if t := s.types[j]; t.in(L, R, AL) {
   504  					if t == AL {
   505  						s.types[i] = AN
   506  					}
   507  					break
   508  				}
   509  			}
   510  		}
   511  	}
   512  
   513  	// Rule W3.
   514  	for i, t := range s.types {
   515  		if t == AL {
   516  			s.types[i] = R
   517  		}
   518  	}
   519  
   520  	// Rule W4.
   521  	// Since there must be values on both sides for this rule to have an
   522  	// effect, the scan skips the first and last value.
   523  	//
   524  	// Although the scan proceeds left to right, and changes the type
   525  	// values in a way that would appear to affect the computations
   526  	// later in the scan, there is actually no problem. A change in the
   527  	// current value can only affect the value to its immediate right,
   528  	// and only affect it if it is ES or CS. But the current value can
   529  	// only change if the value to its right is not ES or CS. Thus
   530  	// either the current value will not change, or its change will have
   531  	// no effect on the remainder of the analysis.
   532  
   533  	for i := 1; i < s.Len()-1; i++ {
   534  		t := s.types[i]
   535  		if t == ES || t == CS {
   536  			prevSepType := s.types[i-1]
   537  			succSepType := s.types[i+1]
   538  			if prevSepType == EN && succSepType == EN {
   539  				s.types[i] = EN
   540  			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
   541  				s.types[i] = AN
   542  			}
   543  		}
   544  	}
   545  
   546  	// Rule W5.
   547  	for i, t := range s.types {
   548  		if t == ET {
   549  			// locate end of sequence
   550  			runStart := i
   551  			runEnd := s.findRunLimit(runStart, ET)
   552  
   553  			// check values at ends of sequence
   554  			t := s.sos
   555  			if runStart > 0 {
   556  				t = s.types[runStart-1]
   557  			}
   558  			if t != EN {
   559  				t = s.eos
   560  				if runEnd < len(s.types) {
   561  					t = s.types[runEnd]
   562  				}
   563  			}
   564  			if t == EN {
   565  				setTypes(s.types[runStart:runEnd], EN)
   566  			}
   567  			// continue at end of sequence
   568  			i = runEnd
   569  		}
   570  	}
   571  
   572  	// Rule W6.
   573  	for i, t := range s.types {
   574  		if t.in(ES, ET, CS) {
   575  			s.types[i] = ON
   576  		}
   577  	}
   578  
   579  	// Rule W7.
   580  	for i, t := range s.types {
   581  		if t == EN {
   582  			// set default if we reach start of run
   583  			prevStrongType := s.sos
   584  			for j := i - 1; j >= 0; j-- {
   585  				t = s.types[j]
   586  				if t == L || t == R { // AL's have been changed to R
   587  					prevStrongType = t
   588  					break
   589  				}
   590  			}
   591  			if prevStrongType == L {
   592  				s.types[i] = L
   593  			}
   594  		}
   595  	}
   596  }
   597  
   598  // 6) resolving neutral types Rules N1-N2.
   599  func (s *isolatingRunSequence) resolveNeutralTypes() {
   600  
   601  	// on entry, only these types can be in resultTypes
   602  	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
   603  
   604  	for i, t := range s.types {
   605  		switch t {
   606  		case WS, ON, B, S, RLI, LRI, FSI, PDI:
   607  			// find bounds of run of neutrals
   608  			runStart := i
   609  			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
   610  
   611  			// determine effective types at ends of run
   612  			var leadType, trailType Class
   613  
   614  			// Note that the character found can only be L, R, AN, or
   615  			// EN.
   616  			if runStart == 0 {
   617  				leadType = s.sos
   618  			} else {
   619  				leadType = s.types[runStart-1]
   620  				if leadType.in(AN, EN) {
   621  					leadType = R
   622  				}
   623  			}
   624  			if runEnd == len(s.types) {
   625  				trailType = s.eos
   626  			} else {
   627  				trailType = s.types[runEnd]
   628  				if trailType.in(AN, EN) {
   629  					trailType = R
   630  				}
   631  			}
   632  
   633  			var resolvedType Class
   634  			if leadType == trailType {
   635  				// Rule N1.
   636  				resolvedType = leadType
   637  			} else {
   638  				// Rule N2.
   639  				// Notice the embedding level of the run is used, not
   640  				// the paragraph embedding level.
   641  				resolvedType = typeForLevel(s.level)
   642  			}
   643  
   644  			setTypes(s.types[runStart:runEnd], resolvedType)
   645  
   646  			// skip over run of (former) neutrals
   647  			i = runEnd
   648  		}
   649  	}
   650  }
   651  
   652  func setLevels(levels []level, newLevel level) {
   653  	for i := range levels {
   654  		levels[i] = newLevel
   655  	}
   656  }
   657  
   658  func setTypes(types []Class, newType Class) {
   659  	for i := range types {
   660  		types[i] = newType
   661  	}
   662  }
   663  
   664  // 7) resolving implicit embedding levels Rules I1, I2.
   665  func (s *isolatingRunSequence) resolveImplicitLevels() {
   666  
   667  	// on entry, only these types can be in resultTypes
   668  	s.assertOnly(L, R, EN, AN)
   669  
   670  	s.resolvedLevels = make([]level, len(s.types))
   671  	setLevels(s.resolvedLevels, s.level)
   672  
   673  	if (s.level & 1) == 0 { // even level
   674  		for i, t := range s.types {
   675  			// Rule I1.
   676  			if t == L {
   677  				// no change
   678  			} else if t == R {
   679  				s.resolvedLevels[i] += 1
   680  			} else { // t == AN || t == EN
   681  				s.resolvedLevels[i] += 2
   682  			}
   683  		}
   684  	} else { // odd level
   685  		for i, t := range s.types {
   686  			// Rule I2.
   687  			if t == R {
   688  				// no change
   689  			} else { // t == L || t == AN || t == EN
   690  				s.resolvedLevels[i] += 1
   691  			}
   692  		}
   693  	}
   694  }
   695  
   696  // Applies the levels and types resolved in rules W1-I2 to the
   697  // resultLevels array.
   698  func (s *isolatingRunSequence) applyLevelsAndTypes() {
   699  	for i, x := range s.indexes {
   700  		s.p.resultTypes[x] = s.types[i]
   701  		s.p.resultLevels[x] = s.resolvedLevels[i]
   702  	}
   703  }
   704  
   705  // Return the limit of the run consisting only of the types in validSet
   706  // starting at index. This checks the value at index, and will return
   707  // index if that value is not in validSet.
   708  func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
   709  loop:
   710  	for ; index < len(s.types); index++ {
   711  		t := s.types[index]
   712  		for _, valid := range validSet {
   713  			if t == valid {
   714  				continue loop
   715  			}
   716  		}
   717  		return index // didn't find a match in validSet
   718  	}
   719  	return len(s.types)
   720  }
   721  
   722  // Algorithm validation. Assert that all values in types are in the
   723  // provided set.
   724  func (s *isolatingRunSequence) assertOnly(codes ...Class) {
   725  loop:
   726  	for i, t := range s.types {
   727  		for _, c := range codes {
   728  			if t == c {
   729  				continue loop
   730  			}
   731  		}
   732  		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
   733  	}
   734  }
   735  
   736  // determineLevelRuns returns an array of level runs. Each level run is
   737  // described as an array of indexes into the input string.
   738  //
   739  // Determines the level runs. Rule X9 will be applied in determining the
   740  // runs, in the way that makes sure the characters that are supposed to be
   741  // removed are not included in the runs.
   742  func (p *paragraph) determineLevelRuns() [][]int {
   743  	run := []int{}
   744  	allRuns := [][]int{}
   745  	currentLevel := implicitLevel
   746  
   747  	for i := range p.initialTypes {
   748  		if !isRemovedByX9(p.initialTypes[i]) {
   749  			if p.resultLevels[i] != currentLevel {
   750  				// we just encountered a new run; wrap up last run
   751  				if currentLevel >= 0 { // only wrap it up if there was a run
   752  					allRuns = append(allRuns, run)
   753  					run = nil
   754  				}
   755  				// Start new run
   756  				currentLevel = p.resultLevels[i]
   757  			}
   758  			run = append(run, i)
   759  		}
   760  	}
   761  	// Wrap up the final run, if any
   762  	if len(run) > 0 {
   763  		allRuns = append(allRuns, run)
   764  	}
   765  	return allRuns
   766  }
   767  
   768  // Definition BD13. Determine isolating run sequences.
   769  func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
   770  	levelRuns := p.determineLevelRuns()
   771  
   772  	// Compute the run that each character belongs to
   773  	runForCharacter := make([]int, p.Len())
   774  	for i, run := range levelRuns {
   775  		for _, index := range run {
   776  			runForCharacter[index] = i
   777  		}
   778  	}
   779  
   780  	sequences := []*isolatingRunSequence{}
   781  
   782  	var currentRunSequence []int
   783  
   784  	for _, run := range levelRuns {
   785  		first := run[0]
   786  		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
   787  			currentRunSequence = nil
   788  			// int run = i;
   789  			for {
   790  				// Copy this level run into currentRunSequence
   791  				currentRunSequence = append(currentRunSequence, run...)
   792  
   793  				last := currentRunSequence[len(currentRunSequence)-1]
   794  				lastT := p.initialTypes[last]
   795  				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
   796  					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
   797  				} else {
   798  					break
   799  				}
   800  			}
   801  			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
   802  		}
   803  	}
   804  	return sequences
   805  }
   806  
   807  // Assign level information to characters removed by rule X9. This is for
   808  // ease of relating the level information to the original input data. Note
   809  // that the levels assigned to these codes are arbitrary, they're chosen so
   810  // as to avoid breaking level runs.
   811  func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
   812  	for i, t := range p.initialTypes {
   813  		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
   814  			p.resultTypes[i] = t
   815  			p.resultLevels[i] = -1
   816  		}
   817  	}
   818  	// now propagate forward the levels information (could have
   819  	// propagated backward, the main thing is not to introduce a level
   820  	// break where one doesn't already exist).
   821  
   822  	if p.resultLevels[0] == -1 {
   823  		p.resultLevels[0] = p.embeddingLevel
   824  	}
   825  	for i := 1; i < len(p.initialTypes); i++ {
   826  		if p.resultLevels[i] == -1 {
   827  			p.resultLevels[i] = p.resultLevels[i-1]
   828  		}
   829  	}
   830  	// Embedding information is for informational purposes only so need not be
   831  	// adjusted.
   832  }
   833  
   834  //
   835  // Output
   836  //
   837  
   838  // getLevels computes levels array breaking lines at offsets in linebreaks.
   839  // Rule L1.
   840  //
   841  // The linebreaks array must include at least one value. The values must be
   842  // in strictly increasing order (no duplicates) between 1 and the length of
   843  // the text, inclusive. The last value must be the length of the text.
   844  func (p *paragraph) getLevels(linebreaks []int) []level {
   845  	// Note that since the previous processing has removed all
   846  	// P, S, and WS values from resultTypes, the values referred to
   847  	// in these rules are the initial types, before any processing
   848  	// has been applied (including processing of overrides).
   849  	//
   850  	// This example implementation has reinserted explicit format codes
   851  	// and BN, in order that the levels array correspond to the
   852  	// initial text. Their final placement is not normative.
   853  	// These codes are treated like WS in this implementation,
   854  	// so they don't interrupt sequences of WS.
   855  
   856  	validateLineBreaks(linebreaks, p.Len())
   857  
   858  	result := append([]level(nil), p.resultLevels...)
   859  
   860  	// don't worry about linebreaks since if there is a break within
   861  	// a series of WS values preceding S, the linebreak itself
   862  	// causes the reset.
   863  	for i, t := range p.initialTypes {
   864  		if t.in(B, S) {
   865  			// Rule L1, clauses one and two.
   866  			result[i] = p.embeddingLevel
   867  
   868  			// Rule L1, clause three.
   869  			for j := i - 1; j >= 0; j-- {
   870  				if isWhitespace(p.initialTypes[j]) { // including format codes
   871  					result[j] = p.embeddingLevel
   872  				} else {
   873  					break
   874  				}
   875  			}
   876  		}
   877  	}
   878  
   879  	// Rule L1, clause four.
   880  	start := 0
   881  	for _, limit := range linebreaks {
   882  		for j := limit - 1; j >= start; j-- {
   883  			if isWhitespace(p.initialTypes[j]) { // including format codes
   884  				result[j] = p.embeddingLevel
   885  			} else {
   886  				break
   887  			}
   888  		}
   889  		start = limit
   890  	}
   891  
   892  	return result
   893  }
   894  
   895  // getReordering returns the reordering of lines from a visual index to a
   896  // logical index for line breaks at the given offsets.
   897  //
   898  // Lines are concatenated from left to right. So for example, the fifth
   899  // character from the left on the third line is
   900  //
   901  //	getReordering(linebreaks)[linebreaks[1] + 4]
   902  //
   903  // (linebreaks[1] is the position after the last character of the second
   904  // line, which is also the index of the first character on the third line,
   905  // and adding four gets the fifth character from the left).
   906  //
   907  // The linebreaks array must include at least one value. The values must be
   908  // in strictly increasing order (no duplicates) between 1 and the length of
   909  // the text, inclusive. The last value must be the length of the text.
   910  func (p *paragraph) getReordering(linebreaks []int) []int {
   911  	validateLineBreaks(linebreaks, p.Len())
   912  
   913  	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
   914  }
   915  
   916  // Return multiline reordering array for a given level array. Reordering
   917  // does not occur across a line break.
   918  func computeMultilineReordering(levels []level, linebreaks []int) []int {
   919  	result := make([]int, len(levels))
   920  
   921  	start := 0
   922  	for _, limit := range linebreaks {
   923  		tempLevels := make([]level, limit-start)
   924  		copy(tempLevels, levels[start:])
   925  
   926  		for j, order := range computeReordering(tempLevels) {
   927  			result[start+j] = order + start
   928  		}
   929  		start = limit
   930  	}
   931  	return result
   932  }
   933  
   934  // Return reordering array for a given level array. This reorders a single
   935  // line. The reordering is a visual to logical map. For example, the
   936  // leftmost char is string.charAt(order[0]). Rule L2.
   937  func computeReordering(levels []level) []int {
   938  	result := make([]int, len(levels))
   939  	// initialize order
   940  	for i := range result {
   941  		result[i] = i
   942  	}
   943  
   944  	// locate highest level found on line.
   945  	// Note the rules say text, but no reordering across line bounds is
   946  	// performed, so this is sufficient.
   947  	highestLevel := level(0)
   948  	lowestOddLevel := level(maxDepth + 2)
   949  	for _, level := range levels {
   950  		if level > highestLevel {
   951  			highestLevel = level
   952  		}
   953  		if level&1 != 0 && level < lowestOddLevel {
   954  			lowestOddLevel = level
   955  		}
   956  	}
   957  
   958  	for level := highestLevel; level >= lowestOddLevel; level-- {
   959  		for i := 0; i < len(levels); i++ {
   960  			if levels[i] >= level {
   961  				// find range of text at or above this level
   962  				start := i
   963  				limit := i + 1
   964  				for limit < len(levels) && levels[limit] >= level {
   965  					limit++
   966  				}
   967  
   968  				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
   969  					result[j], result[k] = result[k], result[j]
   970  				}
   971  				// skip to end of level run
   972  				i = limit
   973  			}
   974  		}
   975  	}
   976  
   977  	return result
   978  }
   979  
   980  // isWhitespace reports whether the type is considered a whitespace type for the
   981  // line break rules.
   982  func isWhitespace(c Class) bool {
   983  	switch c {
   984  	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
   985  		return true
   986  	}
   987  	return false
   988  }
   989  
   990  // isRemovedByX9 reports whether the type is one of the types removed in X9.
   991  func isRemovedByX9(c Class) bool {
   992  	switch c {
   993  	case LRE, RLE, LRO, RLO, PDF, BN:
   994  		return true
   995  	}
   996  	return false
   997  }
   998  
   999  // typeForLevel reports the strong type (L or R) corresponding to the level.
  1000  func typeForLevel(level level) Class {
  1001  	if (level & 0x1) == 0 {
  1002  		return L
  1003  	}
  1004  	return R
  1005  }
  1006  
  1007  func validateTypes(types []Class) error {
  1008  	if len(types) == 0 {
  1009  		return fmt.Errorf("types is null")
  1010  	}
  1011  	for i, t := range types[:len(types)-1] {
  1012  		if t == B {
  1013  			return fmt.Errorf("B type before end of paragraph at index: %d", i)
  1014  		}
  1015  	}
  1016  	return nil
  1017  }
  1018  
  1019  func validateParagraphEmbeddingLevel(embeddingLevel level) error {
  1020  	if embeddingLevel != implicitLevel &&
  1021  		embeddingLevel != 0 &&
  1022  		embeddingLevel != 1 {
  1023  		return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
  1024  	}
  1025  	return nil
  1026  }
  1027  
  1028  func validateLineBreaks(linebreaks []int, textLength int) error {
  1029  	prev := 0
  1030  	for i, next := range linebreaks {
  1031  		if next <= prev {
  1032  			return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
  1033  		}
  1034  		prev = next
  1035  	}
  1036  	if prev != textLength {
  1037  		return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
  1038  	}
  1039  	return nil
  1040  }
  1041  
  1042  func validatePbTypes(pairTypes []bracketType) error {
  1043  	if len(pairTypes) == 0 {
  1044  		return fmt.Errorf("pairTypes is null")
  1045  	}
  1046  	for i, pt := range pairTypes {
  1047  		switch pt {
  1048  		case bpNone, bpOpen, bpClose:
  1049  		default:
  1050  			return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
  1051  		}
  1052  	}
  1053  	return nil
  1054  }
  1055  
  1056  func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
  1057  	if pairValues == nil {
  1058  		return fmt.Errorf("pairValues is null")
  1059  	}
  1060  	if len(pairTypes) != len(pairValues) {
  1061  		return fmt.Errorf("pairTypes is different length from pairValues")
  1062  	}
  1063  	return nil
  1064  }
  1065
View as plain text