add vendor
This commit is contained in:
		
							
								
								
									
										3
									
								
								vendor/golang.org/x/text/AUTHORS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								vendor/golang.org/x/text/AUTHORS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| # This source code refers to The Go Authors for copyright purposes. | ||||
| # The master list of authors is in the main Go distribution, | ||||
| # visible at http://tip.golang.org/AUTHORS. | ||||
							
								
								
									
										3
									
								
								vendor/golang.org/x/text/CONTRIBUTORS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								vendor/golang.org/x/text/CONTRIBUTORS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| # This source code was written by the Go contributors. | ||||
| # The master list of contributors is in the main Go distribution, | ||||
| # visible at http://tip.golang.org/CONTRIBUTORS. | ||||
							
								
								
									
										27
									
								
								vendor/golang.org/x/text/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								vendor/golang.org/x/text/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| Copyright (c) 2009 The Go Authors. All rights reserved. | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
|  | ||||
|    * Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
|    * Redistributions in binary form must reproduce the above | ||||
| copyright notice, this list of conditions and the following disclaimer | ||||
| in the documentation and/or other materials provided with the | ||||
| distribution. | ||||
|    * Neither the name of Google Inc. nor the names of its | ||||
| contributors may be used to endorse or promote products derived from | ||||
| this software without specific prior written permission. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
							
								
								
									
										22
									
								
								vendor/golang.org/x/text/PATENTS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								vendor/golang.org/x/text/PATENTS
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| Additional IP Rights Grant (Patents) | ||||
|  | ||||
| "This implementation" means the copyrightable works distributed by | ||||
| Google as part of the Go project. | ||||
|  | ||||
| Google hereby grants to You a perpetual, worldwide, non-exclusive, | ||||
| no-charge, royalty-free, irrevocable (except as stated in this section) | ||||
| patent license to make, have made, use, offer to sell, sell, import, | ||||
| transfer and otherwise run, modify and propagate the contents of this | ||||
| implementation of Go, where such license applies only to those patent | ||||
| claims, both currently owned or controlled by Google and acquired in | ||||
| the future, licensable by Google that are necessarily infringed by this | ||||
| implementation of Go.  This grant does not include claims that would be | ||||
| infringed only as a consequence of further modification of this | ||||
| implementation.  If you or your agent or exclusive licensee institute or | ||||
| order or agree to the institution of patent litigation against any | ||||
| entity (including a cross-claim or counterclaim in a lawsuit) alleging | ||||
| that this implementation of Go or any code incorporated within this | ||||
| implementation of Go constitutes direct or contributory patent | ||||
| infringement, or inducement of patent infringement, then any patent | ||||
| rights granted to you under this License for this implementation of Go | ||||
| shall terminate as of the date such litigation is filed. | ||||
							
								
								
									
										336
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										336
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,336 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // Package bidirule implements the Bidi Rule defined by RFC 5893. | ||||
| // | ||||
| // This package is under development. The API may change without notice and | ||||
| // without preserving backward compatibility. | ||||
| package bidirule | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/transform" | ||||
| 	"golang.org/x/text/unicode/bidi" | ||||
| ) | ||||
|  | ||||
| // This file contains an implementation of RFC 5893: Right-to-Left Scripts for | ||||
| // Internationalized Domain Names for Applications (IDNA) | ||||
| // | ||||
| // A label is an individual component of a domain name.  Labels are usually | ||||
| // shown separated by dots; for example, the domain name "www.example.com" is | ||||
| // composed of three labels: "www", "example", and "com". | ||||
| // | ||||
| // An RTL label is a label that contains at least one character of class R, AL, | ||||
| // or AN. An LTR label is any label that is not an RTL label. | ||||
| // | ||||
| // A "Bidi domain name" is a domain name that contains at least one RTL label. | ||||
| // | ||||
| //  The following guarantees can be made based on the above: | ||||
| // | ||||
| //  o  In a domain name consisting of only labels that satisfy the rule, | ||||
| //     the requirements of Section 3 are satisfied.  Note that even LTR | ||||
| //     labels and pure ASCII labels have to be tested. | ||||
| // | ||||
| //  o  In a domain name consisting of only LDH labels (as defined in the | ||||
| //     Definitions document [RFC5890]) and labels that satisfy the rule, | ||||
| //     the requirements of Section 3 are satisfied as long as a label | ||||
| //     that starts with an ASCII digit does not come after a | ||||
| //     right-to-left label. | ||||
| // | ||||
| //  No guarantee is given for other combinations. | ||||
|  | ||||
| // ErrInvalid indicates a label is invalid according to the Bidi Rule. | ||||
| var ErrInvalid = errors.New("bidirule: failed Bidi Rule") | ||||
|  | ||||
| type ruleState uint8 | ||||
|  | ||||
| const ( | ||||
| 	ruleInitial ruleState = iota | ||||
| 	ruleLTR | ||||
| 	ruleLTRFinal | ||||
| 	ruleRTL | ||||
| 	ruleRTLFinal | ||||
| 	ruleInvalid | ||||
| ) | ||||
|  | ||||
| type ruleTransition struct { | ||||
| 	next ruleState | ||||
| 	mask uint16 | ||||
| } | ||||
|  | ||||
| var transitions = [...][2]ruleTransition{ | ||||
| 	// [2.1] The first character must be a character with Bidi property L, R, or | ||||
| 	// AL. If it has the R or AL property, it is an RTL label; if it has the L | ||||
| 	// property, it is an LTR label. | ||||
| 	ruleInitial: { | ||||
| 		{ruleLTRFinal, 1 << bidi.L}, | ||||
| 		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, | ||||
| 	}, | ||||
| 	ruleRTL: { | ||||
| 		// [2.3] In an RTL label, the end of the label must be a character with | ||||
| 		// Bidi property R, AL, EN, or AN, followed by zero or more characters | ||||
| 		// with Bidi property NSM. | ||||
| 		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN}, | ||||
|  | ||||
| 		// [2.2] In an RTL label, only characters with the Bidi properties R, | ||||
| 		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||||
| 		// We exclude the entries from [2.3] | ||||
| 		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, | ||||
| 	}, | ||||
| 	ruleRTLFinal: { | ||||
| 		// [2.3] In an RTL label, the end of the label must be a character with | ||||
| 		// Bidi property R, AL, EN, or AN, followed by zero or more characters | ||||
| 		// with Bidi property NSM. | ||||
| 		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM}, | ||||
|  | ||||
| 		// [2.2] In an RTL label, only characters with the Bidi properties R, | ||||
| 		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||||
| 		// We exclude the entries from [2.3] and NSM. | ||||
| 		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, | ||||
| 	}, | ||||
| 	ruleLTR: { | ||||
| 		// [2.6] In an LTR label, the end of the label must be a character with | ||||
| 		// Bidi property L or EN, followed by zero or more characters with Bidi | ||||
| 		// property NSM. | ||||
| 		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN}, | ||||
|  | ||||
| 		// [2.5] In an LTR label, only characters with the Bidi properties L, | ||||
| 		// EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||||
| 		// We exclude the entries from [2.6]. | ||||
| 		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, | ||||
| 	}, | ||||
| 	ruleLTRFinal: { | ||||
| 		// [2.6] In an LTR label, the end of the label must be a character with | ||||
| 		// Bidi property L or EN, followed by zero or more characters with Bidi | ||||
| 		// property NSM. | ||||
| 		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM}, | ||||
|  | ||||
| 		// [2.5] In an LTR label, only characters with the Bidi properties L, | ||||
| 		// EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||||
| 		// We exclude the entries from [2.6]. | ||||
| 		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, | ||||
| 	}, | ||||
| 	ruleInvalid: { | ||||
| 		{ruleInvalid, 0}, | ||||
| 		{ruleInvalid, 0}, | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| // [2.4] In an RTL label, if an EN is present, no AN may be present, and | ||||
| // vice versa. | ||||
| const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN) | ||||
|  | ||||
| // From RFC 5893 | ||||
| // An RTL label is a label that contains at least one character of type | ||||
| // R, AL, or AN. | ||||
| // | ||||
| // An LTR label is any label that is not an RTL label. | ||||
|  | ||||
| // Direction reports the direction of the given label as defined by RFC 5893. | ||||
| // The Bidi Rule does not have to be applied to labels of the category | ||||
| // LeftToRight. | ||||
| func Direction(b []byte) bidi.Direction { | ||||
| 	for i := 0; i < len(b); { | ||||
| 		e, sz := bidi.Lookup(b[i:]) | ||||
| 		if sz == 0 { | ||||
| 			i++ | ||||
| 		} | ||||
| 		c := e.Class() | ||||
| 		if c == bidi.R || c == bidi.AL || c == bidi.AN { | ||||
| 			return bidi.RightToLeft | ||||
| 		} | ||||
| 		i += sz | ||||
| 	} | ||||
| 	return bidi.LeftToRight | ||||
| } | ||||
|  | ||||
| // DirectionString reports the direction of the given label as defined by RFC | ||||
| // 5893. The Bidi Rule does not have to be applied to labels of the category | ||||
| // LeftToRight. | ||||
| func DirectionString(s string) bidi.Direction { | ||||
| 	for i := 0; i < len(s); { | ||||
| 		e, sz := bidi.LookupString(s[i:]) | ||||
| 		if sz == 0 { | ||||
| 			i++ | ||||
| 			continue | ||||
| 		} | ||||
| 		c := e.Class() | ||||
| 		if c == bidi.R || c == bidi.AL || c == bidi.AN { | ||||
| 			return bidi.RightToLeft | ||||
| 		} | ||||
| 		i += sz | ||||
| 	} | ||||
| 	return bidi.LeftToRight | ||||
| } | ||||
|  | ||||
| // Valid reports whether b conforms to the BiDi rule. | ||||
| func Valid(b []byte) bool { | ||||
| 	var t Transformer | ||||
| 	if n, ok := t.advance(b); !ok || n < len(b) { | ||||
| 		return false | ||||
| 	} | ||||
| 	return t.isFinal() | ||||
| } | ||||
|  | ||||
| // ValidString reports whether s conforms to the BiDi rule. | ||||
| func ValidString(s string) bool { | ||||
| 	var t Transformer | ||||
| 	if n, ok := t.advanceString(s); !ok || n < len(s) { | ||||
| 		return false | ||||
| 	} | ||||
| 	return t.isFinal() | ||||
| } | ||||
|  | ||||
| // New returns a Transformer that verifies that input adheres to the Bidi Rule. | ||||
| func New() *Transformer { | ||||
| 	return &Transformer{} | ||||
| } | ||||
|  | ||||
| // Transformer implements transform.Transform. | ||||
| type Transformer struct { | ||||
| 	state  ruleState | ||||
| 	hasRTL bool | ||||
| 	seen   uint16 | ||||
| } | ||||
|  | ||||
| // A rule can only be violated for "Bidi Domain names", meaning if one of the | ||||
| // following categories has been observed. | ||||
| func (t *Transformer) isRTL() bool { | ||||
| 	const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN | ||||
| 	return t.seen&isRTL != 0 | ||||
| } | ||||
|  | ||||
| // Reset implements transform.Transformer. | ||||
| func (t *Transformer) Reset() { *t = Transformer{} } | ||||
|  | ||||
| // Transform implements transform.Transformer. This Transformer has state and | ||||
| // needs to be reset between uses. | ||||
| func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	if len(dst) < len(src) { | ||||
| 		src = src[:len(dst)] | ||||
| 		atEOF = false | ||||
| 		err = transform.ErrShortDst | ||||
| 	} | ||||
| 	n, err1 := t.Span(src, atEOF) | ||||
| 	copy(dst, src[:n]) | ||||
| 	if err == nil || err1 != nil && err1 != transform.ErrShortSrc { | ||||
| 		err = err1 | ||||
| 	} | ||||
| 	return n, n, err | ||||
| } | ||||
|  | ||||
| // Span returns the first n bytes of src that conform to the Bidi rule. | ||||
| func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	if t.state == ruleInvalid && t.isRTL() { | ||||
| 		return 0, ErrInvalid | ||||
| 	} | ||||
| 	n, ok := t.advance(src) | ||||
| 	switch { | ||||
| 	case !ok: | ||||
| 		err = ErrInvalid | ||||
| 	case n < len(src): | ||||
| 		if !atEOF { | ||||
| 			err = transform.ErrShortSrc | ||||
| 			break | ||||
| 		} | ||||
| 		err = ErrInvalid | ||||
| 	case !t.isFinal(): | ||||
| 		err = ErrInvalid | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // Precomputing the ASCII values decreases running time for the ASCII fast path | ||||
| // by about 30%. | ||||
| var asciiTable [128]bidi.Properties | ||||
|  | ||||
| func init() { | ||||
| 	for i := range asciiTable { | ||||
| 		p, _ := bidi.LookupRune(rune(i)) | ||||
| 		asciiTable[i] = p | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (t *Transformer) advance(s []byte) (n int, ok bool) { | ||||
| 	var e bidi.Properties | ||||
| 	var sz int | ||||
| 	for n < len(s) { | ||||
| 		if s[n] < utf8.RuneSelf { | ||||
| 			e, sz = asciiTable[s[n]], 1 | ||||
| 		} else { | ||||
| 			e, sz = bidi.Lookup(s[n:]) | ||||
| 			if sz <= 1 { | ||||
| 				if sz == 1 { | ||||
| 					// We always consider invalid UTF-8 to be invalid, even if | ||||
| 					// the string has not yet been determined to be RTL. | ||||
| 					// TODO: is this correct? | ||||
| 					return n, false | ||||
| 				} | ||||
| 				return n, true // incomplete UTF-8 encoding | ||||
| 			} | ||||
| 		} | ||||
| 		// TODO: using CompactClass would result in noticeable speedup. | ||||
| 		// See unicode/bidi/prop.go:Properties.CompactClass. | ||||
| 		c := uint16(1 << e.Class()) | ||||
| 		t.seen |= c | ||||
| 		if t.seen&exclusiveRTL == exclusiveRTL { | ||||
| 			t.state = ruleInvalid | ||||
| 			return n, false | ||||
| 		} | ||||
| 		switch tr := transitions[t.state]; { | ||||
| 		case tr[0].mask&c != 0: | ||||
| 			t.state = tr[0].next | ||||
| 		case tr[1].mask&c != 0: | ||||
| 			t.state = tr[1].next | ||||
| 		default: | ||||
| 			t.state = ruleInvalid | ||||
| 			if t.isRTL() { | ||||
| 				return n, false | ||||
| 			} | ||||
| 		} | ||||
| 		n += sz | ||||
| 	} | ||||
| 	return n, true | ||||
| } | ||||
|  | ||||
| func (t *Transformer) advanceString(s string) (n int, ok bool) { | ||||
| 	var e bidi.Properties | ||||
| 	var sz int | ||||
| 	for n < len(s) { | ||||
| 		if s[n] < utf8.RuneSelf { | ||||
| 			e, sz = asciiTable[s[n]], 1 | ||||
| 		} else { | ||||
| 			e, sz = bidi.LookupString(s[n:]) | ||||
| 			if sz <= 1 { | ||||
| 				if sz == 1 { | ||||
| 					return n, false // invalid UTF-8 | ||||
| 				} | ||||
| 				return n, true // incomplete UTF-8 encoding | ||||
| 			} | ||||
| 		} | ||||
| 		// TODO: using CompactClass results in noticeable speedup. | ||||
| 		// See unicode/bidi/prop.go:Properties.CompactClass. | ||||
| 		c := uint16(1 << e.Class()) | ||||
| 		t.seen |= c | ||||
| 		if t.seen&exclusiveRTL == exclusiveRTL { | ||||
| 			t.state = ruleInvalid | ||||
| 			return n, false | ||||
| 		} | ||||
| 		switch tr := transitions[t.state]; { | ||||
| 		case tr[0].mask&c != 0: | ||||
| 			t.state = tr[0].next | ||||
| 		case tr[1].mask&c != 0: | ||||
| 			t.state = tr[1].next | ||||
| 		default: | ||||
| 			t.state = ruleInvalid | ||||
| 			if t.isRTL() { | ||||
| 				return n, false | ||||
| 			} | ||||
| 		} | ||||
| 		n += sz | ||||
| 	} | ||||
| 	return n, true | ||||
| } | ||||
							
								
								
									
										11
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // +build go1.10 | ||||
|  | ||||
| package bidirule | ||||
|  | ||||
| func (t *Transformer) isFinal() bool { | ||||
| 	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial | ||||
| } | ||||
							
								
								
									
										14
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								vendor/golang.org/x/text/secure/bidirule/bidirule9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // +build !go1.10 | ||||
|  | ||||
| package bidirule | ||||
|  | ||||
| func (t *Transformer) isFinal() bool { | ||||
| 	if !t.isRTL() { | ||||
| 		return true | ||||
| 	} | ||||
| 	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial | ||||
| } | ||||
							
								
								
									
										709
									
								
								vendor/golang.org/x/text/transform/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										709
									
								
								vendor/golang.org/x/text/transform/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,709 @@ | ||||
| // Copyright 2013 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // Package transform provides reader and writer wrappers that transform the | ||||
| // bytes passing through as well as various transformations. Example | ||||
| // transformations provided by other packages include normalization and | ||||
| // conversion between character sets. | ||||
| package transform // import "golang.org/x/text/transform" | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	// ErrShortDst means that the destination buffer was too short to | ||||
| 	// receive all of the transformed bytes. | ||||
| 	ErrShortDst = errors.New("transform: short destination buffer") | ||||
|  | ||||
| 	// ErrShortSrc means that the source buffer has insufficient data to | ||||
| 	// complete the transformation. | ||||
| 	ErrShortSrc = errors.New("transform: short source buffer") | ||||
|  | ||||
| 	// ErrEndOfSpan means that the input and output (the transformed input) | ||||
| 	// are not identical. | ||||
| 	ErrEndOfSpan = errors.New("transform: input and output are not identical") | ||||
|  | ||||
| 	// errInconsistentByteCount means that Transform returned success (nil | ||||
| 	// error) but also returned nSrc inconsistent with the src argument. | ||||
| 	errInconsistentByteCount = errors.New("transform: inconsistent byte count returned") | ||||
|  | ||||
| 	// errShortInternal means that an internal buffer is not large enough | ||||
| 	// to make progress and the Transform operation must be aborted. | ||||
| 	errShortInternal = errors.New("transform: short internal buffer") | ||||
| ) | ||||
|  | ||||
| // Transformer transforms bytes. | ||||
| type Transformer interface { | ||||
| 	// Transform writes to dst the transformed bytes read from src, and | ||||
| 	// returns the number of dst bytes written and src bytes read. The | ||||
| 	// atEOF argument tells whether src represents the last bytes of the | ||||
| 	// input. | ||||
| 	// | ||||
| 	// Callers should always process the nDst bytes produced and account | ||||
| 	// for the nSrc bytes consumed before considering the error err. | ||||
| 	// | ||||
| 	// A nil error means that all of the transformed bytes (whether freshly | ||||
| 	// transformed from src or left over from previous Transform calls) | ||||
| 	// were written to dst. A nil error can be returned regardless of | ||||
| 	// whether atEOF is true. If err is nil then nSrc must equal len(src); | ||||
| 	// the converse is not necessarily true. | ||||
| 	// | ||||
| 	// ErrShortDst means that dst was too short to receive all of the | ||||
| 	// transformed bytes. ErrShortSrc means that src had insufficient data | ||||
| 	// to complete the transformation. If both conditions apply, then | ||||
| 	// either error may be returned. Other than the error conditions listed | ||||
| 	// here, implementations are free to report other errors that arise. | ||||
| 	Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) | ||||
|  | ||||
| 	// Reset resets the state and allows a Transformer to be reused. | ||||
| 	Reset() | ||||
| } | ||||
|  | ||||
| // SpanningTransformer extends the Transformer interface with a Span method | ||||
| // that determines how much of the input already conforms to the Transformer. | ||||
| type SpanningTransformer interface { | ||||
| 	Transformer | ||||
|  | ||||
| 	// Span returns a position in src such that transforming src[:n] results in | ||||
| 	// identical output src[:n] for these bytes. It does not necessarily return | ||||
| 	// the largest such n. The atEOF argument tells whether src represents the | ||||
| 	// last bytes of the input. | ||||
| 	// | ||||
| 	// Callers should always account for the n bytes consumed before | ||||
| 	// considering the error err. | ||||
| 	// | ||||
| 	// A nil error means that all input bytes are known to be identical to the | ||||
| 	// output produced by the Transformer. A nil error can be returned | ||||
| 	// regardless of whether atEOF is true. If err is nil, then n must | ||||
| 	// equal len(src); the converse is not necessarily true. | ||||
| 	// | ||||
| 	// ErrEndOfSpan means that the Transformer output may differ from the | ||||
| 	// input after n bytes. Note that n may be len(src), meaning that the output | ||||
| 	// would contain additional bytes after otherwise identical output. | ||||
| 	// ErrShortSrc means that src had insufficient data to determine whether the | ||||
| 	// remaining bytes would change. Other than the error conditions listed | ||||
| 	// here, implementations are free to report other errors that arise. | ||||
| 	// | ||||
| 	// Calling Span can modify the Transformer state as a side effect. In | ||||
| 	// effect, it does the transformation just as calling Transform would, only | ||||
| 	// without copying to a destination buffer and only up to a point it can | ||||
| 	// determine the input and output bytes are the same. This is obviously more | ||||
| 	// limited than calling Transform, but can be more efficient in terms of | ||||
| 	// copying and allocating buffers. Calls to Span and Transform may be | ||||
| 	// interleaved. | ||||
| 	Span(src []byte, atEOF bool) (n int, err error) | ||||
| } | ||||
|  | ||||
| // NopResetter can be embedded by implementations of Transformer to add a nop | ||||
| // Reset method. | ||||
| type NopResetter struct{} | ||||
|  | ||||
| // Reset implements the Reset method of the Transformer interface. | ||||
| func (NopResetter) Reset() {} | ||||
|  | ||||
| // Reader wraps another io.Reader by transforming the bytes read. | ||||
| type Reader struct { | ||||
| 	r   io.Reader | ||||
| 	t   Transformer | ||||
| 	err error | ||||
|  | ||||
| 	// dst[dst0:dst1] contains bytes that have been transformed by t but | ||||
| 	// not yet copied out via Read. | ||||
| 	dst        []byte | ||||
| 	dst0, dst1 int | ||||
|  | ||||
| 	// src[src0:src1] contains bytes that have been read from r but not | ||||
| 	// yet transformed through t. | ||||
| 	src        []byte | ||||
| 	src0, src1 int | ||||
|  | ||||
| 	// transformComplete is whether the transformation is complete, | ||||
| 	// regardless of whether or not it was successful. | ||||
| 	transformComplete bool | ||||
| } | ||||
|  | ||||
| const defaultBufSize = 4096 | ||||
|  | ||||
| // NewReader returns a new Reader that wraps r by transforming the bytes read | ||||
| // via t. It calls Reset on t. | ||||
| func NewReader(r io.Reader, t Transformer) *Reader { | ||||
| 	t.Reset() | ||||
| 	return &Reader{ | ||||
| 		r:   r, | ||||
| 		t:   t, | ||||
| 		dst: make([]byte, defaultBufSize), | ||||
| 		src: make([]byte, defaultBufSize), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Read implements the io.Reader interface. | ||||
| func (r *Reader) Read(p []byte) (int, error) { | ||||
| 	n, err := 0, error(nil) | ||||
| 	for { | ||||
| 		// Copy out any transformed bytes and return the final error if we are done. | ||||
| 		if r.dst0 != r.dst1 { | ||||
| 			n = copy(p, r.dst[r.dst0:r.dst1]) | ||||
| 			r.dst0 += n | ||||
| 			if r.dst0 == r.dst1 && r.transformComplete { | ||||
| 				return n, r.err | ||||
| 			} | ||||
| 			return n, nil | ||||
| 		} else if r.transformComplete { | ||||
| 			return 0, r.err | ||||
| 		} | ||||
|  | ||||
| 		// Try to transform some source bytes, or to flush the transformer if we | ||||
| 		// are out of source bytes. We do this even if r.r.Read returned an error. | ||||
| 		// As the io.Reader documentation says, "process the n > 0 bytes returned | ||||
| 		// before considering the error". | ||||
| 		if r.src0 != r.src1 || r.err != nil { | ||||
| 			r.dst0 = 0 | ||||
| 			r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF) | ||||
| 			r.src0 += n | ||||
|  | ||||
| 			switch { | ||||
| 			case err == nil: | ||||
| 				if r.src0 != r.src1 { | ||||
| 					r.err = errInconsistentByteCount | ||||
| 				} | ||||
| 				// The Transform call was successful; we are complete if we | ||||
| 				// cannot read more bytes into src. | ||||
| 				r.transformComplete = r.err != nil | ||||
| 				continue | ||||
| 			case err == ErrShortDst && (r.dst1 != 0 || n != 0): | ||||
| 				// Make room in dst by copying out, and try again. | ||||
| 				continue | ||||
| 			case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil: | ||||
| 				// Read more bytes into src via the code below, and try again. | ||||
| 			default: | ||||
| 				r.transformComplete = true | ||||
| 				// The reader error (r.err) takes precedence over the | ||||
| 				// transformer error (err) unless r.err is nil or io.EOF. | ||||
| 				if r.err == nil || r.err == io.EOF { | ||||
| 					r.err = err | ||||
| 				} | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// Move any untransformed source bytes to the start of the buffer | ||||
| 		// and read more bytes. | ||||
| 		if r.src0 != 0 { | ||||
| 			r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1]) | ||||
| 		} | ||||
| 		n, r.err = r.r.Read(r.src[r.src1:]) | ||||
| 		r.src1 += n | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO: implement ReadByte (and ReadRune??). | ||||
|  | ||||
| // Writer wraps another io.Writer by transforming the bytes read. | ||||
| // The user needs to call Close to flush unwritten bytes that may | ||||
| // be buffered. | ||||
| type Writer struct { | ||||
| 	w   io.Writer | ||||
| 	t   Transformer | ||||
| 	dst []byte | ||||
|  | ||||
| 	// src[:n] contains bytes that have not yet passed through t. | ||||
| 	src []byte | ||||
| 	n   int | ||||
| } | ||||
|  | ||||
| // NewWriter returns a new Writer that wraps w by transforming the bytes written | ||||
| // via t. It calls Reset on t. | ||||
| func NewWriter(w io.Writer, t Transformer) *Writer { | ||||
| 	t.Reset() | ||||
| 	return &Writer{ | ||||
| 		w:   w, | ||||
| 		t:   t, | ||||
| 		dst: make([]byte, defaultBufSize), | ||||
| 		src: make([]byte, defaultBufSize), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Write implements the io.Writer interface. If there are not enough | ||||
| // bytes available to complete a Transform, the bytes will be buffered | ||||
| // for the next write. Call Close to convert the remaining bytes. | ||||
| func (w *Writer) Write(data []byte) (n int, err error) { | ||||
| 	src := data | ||||
| 	if w.n > 0 { | ||||
| 		// Append bytes from data to the last remainder. | ||||
| 		// TODO: limit the amount copied on first try. | ||||
| 		n = copy(w.src[w.n:], data) | ||||
| 		w.n += n | ||||
| 		src = w.src[:w.n] | ||||
| 	} | ||||
| 	for { | ||||
| 		nDst, nSrc, err := w.t.Transform(w.dst, src, false) | ||||
| 		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { | ||||
| 			return n, werr | ||||
| 		} | ||||
| 		src = src[nSrc:] | ||||
| 		if w.n == 0 { | ||||
| 			n += nSrc | ||||
| 		} else if len(src) <= n { | ||||
| 			// Enough bytes from w.src have been consumed. We make src point | ||||
| 			// to data instead to reduce the copying. | ||||
| 			w.n = 0 | ||||
| 			n -= len(src) | ||||
| 			src = data[n:] | ||||
| 			if n < len(data) && (err == nil || err == ErrShortSrc) { | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 		switch err { | ||||
| 		case ErrShortDst: | ||||
| 			// This error is okay as long as we are making progress. | ||||
| 			if nDst > 0 || nSrc > 0 { | ||||
| 				continue | ||||
| 			} | ||||
| 		case ErrShortSrc: | ||||
| 			if len(src) < len(w.src) { | ||||
| 				m := copy(w.src, src) | ||||
| 				// If w.n > 0, bytes from data were already copied to w.src and n | ||||
| 				// was already set to the number of bytes consumed. | ||||
| 				if w.n == 0 { | ||||
| 					n += m | ||||
| 				} | ||||
| 				w.n = m | ||||
| 				err = nil | ||||
| 			} else if nDst > 0 || nSrc > 0 { | ||||
| 				// Not enough buffer to store the remainder. Keep processing as | ||||
| 				// long as there is progress. Without this case, transforms that | ||||
| 				// require a lookahead larger than the buffer may result in an | ||||
| 				// error. This is not something one may expect to be common in | ||||
| 				// practice, but it may occur when buffers are set to small | ||||
| 				// sizes during testing. | ||||
| 				continue | ||||
| 			} | ||||
| 		case nil: | ||||
| 			if w.n > 0 { | ||||
| 				err = errInconsistentByteCount | ||||
| 			} | ||||
| 		} | ||||
| 		return n, err | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Close implements the io.Closer interface. | ||||
| func (w *Writer) Close() error { | ||||
| 	src := w.src[:w.n] | ||||
| 	for { | ||||
| 		nDst, nSrc, err := w.t.Transform(w.dst, src, true) | ||||
| 		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { | ||||
| 			return werr | ||||
| 		} | ||||
| 		if err != ErrShortDst { | ||||
| 			return err | ||||
| 		} | ||||
| 		src = src[nSrc:] | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type nop struct{ NopResetter } | ||||
|  | ||||
| func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	n := copy(dst, src) | ||||
| 	if n < len(src) { | ||||
| 		err = ErrShortDst | ||||
| 	} | ||||
| 	return n, n, err | ||||
| } | ||||
|  | ||||
| func (nop) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	return len(src), nil | ||||
| } | ||||
|  | ||||
| type discard struct{ NopResetter } | ||||
|  | ||||
| func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	return 0, len(src), nil | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	// Discard is a Transformer for which all Transform calls succeed | ||||
| 	// by consuming all bytes and writing nothing. | ||||
| 	Discard Transformer = discard{} | ||||
|  | ||||
| 	// Nop is a SpanningTransformer that copies src to dst. | ||||
| 	Nop SpanningTransformer = nop{} | ||||
| ) | ||||
|  | ||||
| // chain is a sequence of links. A chain with N Transformers has N+1 links and | ||||
| // N+1 buffers. Of those N+1 buffers, the first and last are the src and dst | ||||
| // buffers given to chain.Transform and the middle N-1 buffers are intermediate | ||||
| // buffers owned by the chain. The i'th link transforms bytes from the i'th | ||||
| // buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer | ||||
| // chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N). | ||||
| type chain struct { | ||||
| 	link []link | ||||
| 	err  error | ||||
| 	// errStart is the index at which the error occurred plus 1. Processing | ||||
| 	// errStart at this level at the next call to Transform. As long as | ||||
| 	// errStart > 0, chain will not consume any more source bytes. | ||||
| 	errStart int | ||||
| } | ||||
|  | ||||
| func (c *chain) fatalError(errIndex int, err error) { | ||||
| 	if i := errIndex + 1; i > c.errStart { | ||||
| 		c.errStart = i | ||||
| 		c.err = err | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type link struct { | ||||
| 	t Transformer | ||||
| 	// b[p:n] holds the bytes to be transformed by t. | ||||
| 	b []byte | ||||
| 	p int | ||||
| 	n int | ||||
| } | ||||
|  | ||||
| func (l *link) src() []byte { | ||||
| 	return l.b[l.p:l.n] | ||||
| } | ||||
|  | ||||
| func (l *link) dst() []byte { | ||||
| 	return l.b[l.n:] | ||||
| } | ||||
|  | ||||
| // Chain returns a Transformer that applies t in sequence. | ||||
| func Chain(t ...Transformer) Transformer { | ||||
| 	if len(t) == 0 { | ||||
| 		return nop{} | ||||
| 	} | ||||
| 	c := &chain{link: make([]link, len(t)+1)} | ||||
| 	for i, tt := range t { | ||||
| 		c.link[i].t = tt | ||||
| 	} | ||||
| 	// Allocate intermediate buffers. | ||||
| 	b := make([][defaultBufSize]byte, len(t)-1) | ||||
| 	for i := range b { | ||||
| 		c.link[i+1].b = b[i][:] | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
|  | ||||
| // Reset resets the state of Chain. It calls Reset on all the Transformers. | ||||
| func (c *chain) Reset() { | ||||
| 	for i, l := range c.link { | ||||
| 		if l.t != nil { | ||||
| 			l.t.Reset() | ||||
| 		} | ||||
| 		c.link[i].p, c.link[i].n = 0, 0 | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO: make chain use Span (is going to be fun to implement!) | ||||
|  | ||||
| // Transform applies the transformers of c in sequence. | ||||
| func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	// Set up src and dst in the chain. | ||||
| 	srcL := &c.link[0] | ||||
| 	dstL := &c.link[len(c.link)-1] | ||||
| 	srcL.b, srcL.p, srcL.n = src, 0, len(src) | ||||
| 	dstL.b, dstL.n = dst, 0 | ||||
| 	var lastFull, needProgress bool // for detecting progress | ||||
|  | ||||
| 	// i is the index of the next Transformer to apply, for i in [low, high]. | ||||
| 	// low is the lowest index for which c.link[low] may still produce bytes. | ||||
| 	// high is the highest index for which c.link[high] has a Transformer. | ||||
| 	// The error returned by Transform determines whether to increase or | ||||
| 	// decrease i. We try to completely fill a buffer before converting it. | ||||
| 	for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; { | ||||
| 		in, out := &c.link[i], &c.link[i+1] | ||||
| 		nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i) | ||||
| 		out.n += nDst | ||||
| 		in.p += nSrc | ||||
| 		if i > 0 && in.p == in.n { | ||||
| 			in.p, in.n = 0, 0 | ||||
| 		} | ||||
| 		needProgress, lastFull = lastFull, false | ||||
| 		switch err0 { | ||||
| 		case ErrShortDst: | ||||
| 			// Process the destination buffer next. Return if we are already | ||||
| 			// at the high index. | ||||
| 			if i == high { | ||||
| 				return dstL.n, srcL.p, ErrShortDst | ||||
| 			} | ||||
| 			if out.n != 0 { | ||||
| 				i++ | ||||
| 				// If the Transformer at the next index is not able to process any | ||||
| 				// source bytes there is nothing that can be done to make progress | ||||
| 				// and the bytes will remain unprocessed. lastFull is used to | ||||
| 				// detect this and break out of the loop with a fatal error. | ||||
| 				lastFull = true | ||||
| 				continue | ||||
| 			} | ||||
| 			// The destination buffer was too small, but is completely empty. | ||||
| 			// Return a fatal error as this transformation can never complete. | ||||
| 			c.fatalError(i, errShortInternal) | ||||
| 		case ErrShortSrc: | ||||
| 			if i == 0 { | ||||
| 				// Save ErrShortSrc in err. All other errors take precedence. | ||||
| 				err = ErrShortSrc | ||||
| 				break | ||||
| 			} | ||||
| 			// Source bytes were depleted before filling up the destination buffer. | ||||
| 			// Verify we made some progress, move the remaining bytes to the errStart | ||||
| 			// and try to get more source bytes. | ||||
| 			if needProgress && nSrc == 0 || in.n-in.p == len(in.b) { | ||||
| 				// There were not enough source bytes to proceed while the source | ||||
| 				// buffer cannot hold any more bytes. Return a fatal error as this | ||||
| 				// transformation can never complete. | ||||
| 				c.fatalError(i, errShortInternal) | ||||
| 				break | ||||
| 			} | ||||
| 			// in.b is an internal buffer and we can make progress. | ||||
| 			in.p, in.n = 0, copy(in.b, in.src()) | ||||
| 			fallthrough | ||||
| 		case nil: | ||||
| 			// if i == low, we have depleted the bytes at index i or any lower levels. | ||||
| 			// In that case we increase low and i. In all other cases we decrease i to | ||||
| 			// fetch more bytes before proceeding to the next index. | ||||
| 			if i > low { | ||||
| 				i-- | ||||
| 				continue | ||||
| 			} | ||||
| 		default: | ||||
| 			c.fatalError(i, err0) | ||||
| 		} | ||||
| 		// Exhausted level low or fatal error: increase low and continue | ||||
| 		// to process the bytes accepted so far. | ||||
| 		i++ | ||||
| 		low = i | ||||
| 	} | ||||
|  | ||||
| 	// If c.errStart > 0, this means we found a fatal error.  We will clear | ||||
| 	// all upstream buffers. At this point, no more progress can be made | ||||
| 	// downstream, as Transform would have bailed while handling ErrShortDst. | ||||
| 	if c.errStart > 0 { | ||||
| 		for i := 1; i < c.errStart; i++ { | ||||
| 			c.link[i].p, c.link[i].n = 0, 0 | ||||
| 		} | ||||
| 		err, c.errStart, c.err = c.err, 0, nil | ||||
| 	} | ||||
| 	return dstL.n, srcL.p, err | ||||
| } | ||||
|  | ||||
| // Deprecated: Use runes.Remove instead. | ||||
| func RemoveFunc(f func(r rune) bool) Transformer { | ||||
| 	return removeF(f) | ||||
| } | ||||
|  | ||||
| type removeF func(r rune) bool | ||||
|  | ||||
| func (removeF) Reset() {} | ||||
|  | ||||
| // Transform implements the Transformer interface. | ||||
| func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] { | ||||
|  | ||||
| 		if r = rune(src[0]); r < utf8.RuneSelf { | ||||
| 			sz = 1 | ||||
| 		} else { | ||||
| 			r, sz = utf8.DecodeRune(src) | ||||
|  | ||||
| 			if sz == 1 { | ||||
| 				// Invalid rune. | ||||
| 				if !atEOF && !utf8.FullRune(src) { | ||||
| 					err = ErrShortSrc | ||||
| 					break | ||||
| 				} | ||||
| 				// We replace illegal bytes with RuneError. Not doing so might | ||||
| 				// otherwise turn a sequence of invalid UTF-8 into valid UTF-8. | ||||
| 				// The resulting byte sequence may subsequently contain runes | ||||
| 				// for which t(r) is true that were passed unnoticed. | ||||
| 				if !t(r) { | ||||
| 					if nDst+3 > len(dst) { | ||||
| 						err = ErrShortDst | ||||
| 						break | ||||
| 					} | ||||
| 					nDst += copy(dst[nDst:], "\uFFFD") | ||||
| 				} | ||||
| 				nSrc++ | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if !t(r) { | ||||
| 			if nDst+sz > len(dst) { | ||||
| 				err = ErrShortDst | ||||
| 				break | ||||
| 			} | ||||
| 			nDst += copy(dst[nDst:], src[:sz]) | ||||
| 		} | ||||
| 		nSrc += sz | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // grow returns a new []byte that is longer than b, and copies the first n bytes | ||||
| // of b to the start of the new slice. | ||||
| func grow(b []byte, n int) []byte { | ||||
| 	m := len(b) | ||||
| 	if m <= 32 { | ||||
| 		m = 64 | ||||
| 	} else if m <= 256 { | ||||
| 		m *= 2 | ||||
| 	} else { | ||||
| 		m += m >> 1 | ||||
| 	} | ||||
| 	buf := make([]byte, m) | ||||
| 	copy(buf, b[:n]) | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| const initialBufSize = 128 | ||||
|  | ||||
| // String returns a string with the result of converting s[:n] using t, where | ||||
| // n <= len(s). If err == nil, n will be len(s). It calls Reset on t. | ||||
| func String(t Transformer, s string) (result string, n int, err error) { | ||||
| 	t.Reset() | ||||
| 	if s == "" { | ||||
| 		// Fast path for the common case for empty input. Results in about a | ||||
| 		// 86% reduction of running time for BenchmarkStringLowerEmpty. | ||||
| 		if _, _, err := t.Transform(nil, nil, true); err == nil { | ||||
| 			return "", 0, nil | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Allocate only once. Note that both dst and src escape when passed to | ||||
| 	// Transform. | ||||
| 	buf := [2 * initialBufSize]byte{} | ||||
| 	dst := buf[:initialBufSize:initialBufSize] | ||||
| 	src := buf[initialBufSize : 2*initialBufSize] | ||||
|  | ||||
| 	// The input string s is transformed in multiple chunks (starting with a | ||||
| 	// chunk size of initialBufSize). nDst and nSrc are per-chunk (or | ||||
| 	// per-Transform-call) indexes, pDst and pSrc are overall indexes. | ||||
| 	nDst, nSrc := 0, 0 | ||||
| 	pDst, pSrc := 0, 0 | ||||
|  | ||||
| 	// pPrefix is the length of a common prefix: the first pPrefix bytes of the | ||||
| 	// result will equal the first pPrefix bytes of s. It is not guaranteed to | ||||
| 	// be the largest such value, but if pPrefix, len(result) and len(s) are | ||||
| 	// all equal after the final transform (i.e. calling Transform with atEOF | ||||
| 	// being true returned nil error) then we don't need to allocate a new | ||||
| 	// result string. | ||||
| 	pPrefix := 0 | ||||
| 	for { | ||||
| 		// Invariant: pDst == pPrefix && pSrc == pPrefix. | ||||
|  | ||||
| 		n := copy(src, s[pSrc:]) | ||||
| 		nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s)) | ||||
| 		pDst += nDst | ||||
| 		pSrc += nSrc | ||||
|  | ||||
| 		// TODO:  let transformers implement an optional Spanner interface, akin | ||||
| 		// to norm's QuickSpan. This would even allow us to avoid any allocation. | ||||
| 		if !bytes.Equal(dst[:nDst], src[:nSrc]) { | ||||
| 			break | ||||
| 		} | ||||
| 		pPrefix = pSrc | ||||
| 		if err == ErrShortDst { | ||||
| 			// A buffer can only be short if a transformer modifies its input. | ||||
| 			break | ||||
| 		} else if err == ErrShortSrc { | ||||
| 			if nSrc == 0 { | ||||
| 				// No progress was made. | ||||
| 				break | ||||
| 			} | ||||
| 			// Equal so far and !atEOF, so continue checking. | ||||
| 		} else if err != nil || pPrefix == len(s) { | ||||
| 			return string(s[:pPrefix]), pPrefix, err | ||||
| 		} | ||||
| 	} | ||||
| 	// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc. | ||||
|  | ||||
| 	// We have transformed the first pSrc bytes of the input s to become pDst | ||||
| 	// transformed bytes. Those transformed bytes are discontiguous: the first | ||||
| 	// pPrefix of them equal s[:pPrefix] and the last nDst of them equal | ||||
| 	// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so | ||||
| 	// that they become one contiguous slice: dst[:pDst]. | ||||
| 	if pPrefix != 0 { | ||||
| 		newDst := dst | ||||
| 		if pDst > len(newDst) { | ||||
| 			newDst = make([]byte, len(s)+nDst-nSrc) | ||||
| 		} | ||||
| 		copy(newDst[pPrefix:pDst], dst[:nDst]) | ||||
| 		copy(newDst[:pPrefix], s[:pPrefix]) | ||||
| 		dst = newDst | ||||
| 	} | ||||
|  | ||||
| 	// Prevent duplicate Transform calls with atEOF being true at the end of | ||||
| 	// the input. Also return if we have an unrecoverable error. | ||||
| 	if (err == nil && pSrc == len(s)) || | ||||
| 		(err != nil && err != ErrShortDst && err != ErrShortSrc) { | ||||
| 		return string(dst[:pDst]), pSrc, err | ||||
| 	} | ||||
|  | ||||
| 	// Transform the remaining input, growing dst and src buffers as necessary. | ||||
| 	for { | ||||
| 		n := copy(src, s[pSrc:]) | ||||
| 		atEOF := pSrc+n == len(s) | ||||
| 		nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], atEOF) | ||||
| 		pDst += nDst | ||||
| 		pSrc += nSrc | ||||
|  | ||||
| 		// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can | ||||
| 		// make progress. This may avoid excessive allocations. | ||||
| 		if err == ErrShortDst { | ||||
| 			if nDst == 0 { | ||||
| 				dst = grow(dst, pDst) | ||||
| 			} | ||||
| 		} else if err == ErrShortSrc { | ||||
| 			if atEOF { | ||||
| 				return string(dst[:pDst]), pSrc, err | ||||
| 			} | ||||
| 			if nSrc == 0 { | ||||
| 				src = grow(src, 0) | ||||
| 			} | ||||
| 		} else if err != nil || pSrc == len(s) { | ||||
| 			return string(dst[:pDst]), pSrc, err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Bytes returns a new byte slice with the result of converting b[:n] using t, | ||||
| // where n <= len(b). If err == nil, n will be len(b). It calls Reset on t. | ||||
| func Bytes(t Transformer, b []byte) (result []byte, n int, err error) { | ||||
| 	return doAppend(t, 0, make([]byte, len(b)), b) | ||||
| } | ||||
|  | ||||
| // Append appends the result of converting src[:n] using t to dst, where | ||||
| // n <= len(src), If err == nil, n will be len(src). It calls Reset on t. | ||||
| func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) { | ||||
| 	if len(dst) == cap(dst) { | ||||
| 		n := len(src) + len(dst) // It is okay for this to be 0. | ||||
| 		b := make([]byte, n) | ||||
| 		dst = b[:copy(b, dst)] | ||||
| 	} | ||||
| 	return doAppend(t, len(dst), dst[:cap(dst)], src) | ||||
| } | ||||
|  | ||||
| func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) { | ||||
| 	t.Reset() | ||||
| 	pSrc := 0 | ||||
| 	for { | ||||
| 		nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true) | ||||
| 		pDst += nDst | ||||
| 		pSrc += nSrc | ||||
| 		if err != ErrShortDst { | ||||
| 			return dst[:pDst], pSrc, err | ||||
| 		} | ||||
|  | ||||
| 		// Grow the destination buffer, but do not grow as long as we can make | ||||
| 		// progress. This may avoid excessive allocations. | ||||
| 		if nDst == 0 { | ||||
| 			dst = grow(dst, pDst) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										359
									
								
								vendor/golang.org/x/text/unicode/bidi/bidi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										359
									
								
								vendor/golang.org/x/text/unicode/bidi/bidi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,359 @@ | ||||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| //go:generate go run gen.go gen_trieval.go gen_ranges.go | ||||
|  | ||||
| // Package bidi contains functionality for bidirectional text support. | ||||
| // | ||||
| // See https://www.unicode.org/reports/tr9. | ||||
| // | ||||
| // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways | ||||
| // and without notice. | ||||
| package bidi // import "golang.org/x/text/unicode/bidi" | ||||
|  | ||||
| // TODO | ||||
| // - Transformer for reordering? | ||||
| // - Transformer (validator, really) for Bidi Rule. | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| ) | ||||
|  | ||||
| // This API tries to avoid dealing with embedding levels for now. Under the hood | ||||
| // these will be computed, but the question is to which extent the user should | ||||
| // know they exist. We should at some point allow the user to specify an | ||||
| // embedding hierarchy, though. | ||||
|  | ||||
| // A Direction indicates the overall flow of text. | ||||
| type Direction int | ||||
|  | ||||
| const ( | ||||
| 	// LeftToRight indicates the text contains no right-to-left characters and | ||||
| 	// that either there are some left-to-right characters or the option | ||||
| 	// DefaultDirection(LeftToRight) was passed. | ||||
| 	LeftToRight Direction = iota | ||||
|  | ||||
| 	// RightToLeft indicates the text contains no left-to-right characters and | ||||
| 	// that either there are some right-to-left characters or the option | ||||
| 	// DefaultDirection(RightToLeft) was passed. | ||||
| 	RightToLeft | ||||
|  | ||||
| 	// Mixed indicates text contains both left-to-right and right-to-left | ||||
| 	// characters. | ||||
| 	Mixed | ||||
|  | ||||
| 	// Neutral means that text contains no left-to-right and right-to-left | ||||
| 	// characters and that no default direction has been set. | ||||
| 	Neutral | ||||
| ) | ||||
|  | ||||
| type options struct { | ||||
| 	defaultDirection Direction | ||||
| } | ||||
|  | ||||
| // An Option is an option for Bidi processing. | ||||
| type Option func(*options) | ||||
|  | ||||
| // ICU allows the user to define embedding levels. This may be used, for example, | ||||
| // to use hierarchical structure of markup languages to define embeddings. | ||||
| // The following option may be a way to expose this functionality in this API. | ||||
| // // LevelFunc sets a function that associates nesting levels with the given text. | ||||
| // // The levels function will be called with monotonically increasing values for p. | ||||
| // func LevelFunc(levels func(p int) int) Option { | ||||
| // 	panic("unimplemented") | ||||
| // } | ||||
|  | ||||
| // DefaultDirection sets the default direction for a Paragraph. The direction is | ||||
| // overridden if the text contains directional characters. | ||||
| func DefaultDirection(d Direction) Option { | ||||
| 	return func(opts *options) { | ||||
| 		opts.defaultDirection = d | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // A Paragraph holds a single Paragraph for Bidi processing. | ||||
| type Paragraph struct { | ||||
| 	p          []byte | ||||
| 	o          Ordering | ||||
| 	opts       []Option | ||||
| 	types      []Class | ||||
| 	pairTypes  []bracketType | ||||
| 	pairValues []rune | ||||
| 	runes      []rune | ||||
| 	options    options | ||||
| } | ||||
|  | ||||
| // Initialize the p.pairTypes, p.pairValues and p.types from the input previously | ||||
| // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph | ||||
| // separator (bidi class B). | ||||
| // | ||||
| // The function p.Order() needs these values to be set, so this preparation could be postponed. | ||||
| // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph | ||||
| // separator, the whole input needs to be processed anyway and should not be done twice. | ||||
| // | ||||
| // The function has the same return values as SetBytes() / SetString() | ||||
| func (p *Paragraph) prepareInput() (n int, err error) { | ||||
| 	p.runes = bytes.Runes(p.p) | ||||
| 	bytecount := 0 | ||||
| 	// clear slices from previous SetString or SetBytes | ||||
| 	p.pairTypes = nil | ||||
| 	p.pairValues = nil | ||||
| 	p.types = nil | ||||
|  | ||||
| 	for _, r := range p.runes { | ||||
| 		props, i := LookupRune(r) | ||||
| 		bytecount += i | ||||
| 		cls := props.Class() | ||||
| 		if cls == B { | ||||
| 			return bytecount, nil | ||||
| 		} | ||||
| 		p.types = append(p.types, cls) | ||||
| 		if props.IsOpeningBracket() { | ||||
| 			p.pairTypes = append(p.pairTypes, bpOpen) | ||||
| 			p.pairValues = append(p.pairValues, r) | ||||
| 		} else if props.IsBracket() { | ||||
| 			// this must be a closing bracket, | ||||
| 			// since IsOpeningBracket is not true | ||||
| 			p.pairTypes = append(p.pairTypes, bpClose) | ||||
| 			p.pairValues = append(p.pairValues, r) | ||||
| 		} else { | ||||
| 			p.pairTypes = append(p.pairTypes, bpNone) | ||||
| 			p.pairValues = append(p.pairValues, 0) | ||||
| 		} | ||||
| 	} | ||||
| 	return bytecount, nil | ||||
| } | ||||
|  | ||||
| // SetBytes configures p for the given paragraph text. It replaces text | ||||
| // previously set by SetBytes or SetString. If b contains a paragraph separator | ||||
| // it will only process the first paragraph and report the number of bytes | ||||
| // consumed from b including this separator. Error may be non-nil if options are | ||||
| // given. | ||||
| func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { | ||||
| 	p.p = b | ||||
| 	p.opts = opts | ||||
| 	return p.prepareInput() | ||||
| } | ||||
|  | ||||
| // SetString configures s for the given paragraph text. It replaces text | ||||
| // previously set by SetBytes or SetString. If s contains a paragraph separator | ||||
| // it will only process the first paragraph and report the number of bytes | ||||
| // consumed from s including this separator. Error may be non-nil if options are | ||||
| // given. | ||||
| func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { | ||||
| 	p.p = []byte(s) | ||||
| 	p.opts = opts | ||||
| 	return p.prepareInput() | ||||
| } | ||||
|  | ||||
| // IsLeftToRight reports whether the principle direction of rendering for this | ||||
| // paragraphs is left-to-right. If this returns false, the principle direction | ||||
| // of rendering is right-to-left. | ||||
| func (p *Paragraph) IsLeftToRight() bool { | ||||
| 	return p.Direction() == LeftToRight | ||||
| } | ||||
|  | ||||
| // Direction returns the direction of the text of this paragraph. | ||||
| // | ||||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | ||||
| func (p *Paragraph) Direction() Direction { | ||||
| 	return p.o.Direction() | ||||
| } | ||||
|  | ||||
| // TODO: what happens if the position is > len(input)? This should return an error. | ||||
|  | ||||
| // RunAt reports the Run at the given position of the input text. | ||||
| // | ||||
| // This method can be used for computing line breaks on paragraphs. | ||||
| func (p *Paragraph) RunAt(pos int) Run { | ||||
| 	c := 0 | ||||
| 	runNumber := 0 | ||||
| 	for i, r := range p.o.runes { | ||||
| 		c += len(r) | ||||
| 		if pos < c { | ||||
| 			runNumber = i | ||||
| 		} | ||||
| 	} | ||||
| 	return p.o.Run(runNumber) | ||||
| } | ||||
|  | ||||
| func calculateOrdering(levels []level, runes []rune) Ordering { | ||||
| 	var curDir Direction | ||||
|  | ||||
| 	prevDir := Neutral | ||||
| 	prevI := 0 | ||||
|  | ||||
| 	o := Ordering{} | ||||
| 	// lvl = 0,2,4,...: left to right | ||||
| 	// lvl = 1,3,5,...: right to left | ||||
| 	for i, lvl := range levels { | ||||
| 		if lvl%2 == 0 { | ||||
| 			curDir = LeftToRight | ||||
| 		} else { | ||||
| 			curDir = RightToLeft | ||||
| 		} | ||||
| 		if curDir != prevDir { | ||||
| 			if i > 0 { | ||||
| 				o.runes = append(o.runes, runes[prevI:i]) | ||||
| 				o.directions = append(o.directions, prevDir) | ||||
| 				o.startpos = append(o.startpos, prevI) | ||||
| 			} | ||||
| 			prevI = i | ||||
| 			prevDir = curDir | ||||
| 		} | ||||
| 	} | ||||
| 	o.runes = append(o.runes, runes[prevI:]) | ||||
| 	o.directions = append(o.directions, prevDir) | ||||
| 	o.startpos = append(o.startpos, prevI) | ||||
| 	return o | ||||
| } | ||||
|  | ||||
| // Order computes the visual ordering of all the runs in a Paragraph. | ||||
| func (p *Paragraph) Order() (Ordering, error) { | ||||
| 	if len(p.types) == 0 { | ||||
| 		return Ordering{}, nil | ||||
| 	} | ||||
|  | ||||
| 	for _, fn := range p.opts { | ||||
| 		fn(&p.options) | ||||
| 	} | ||||
| 	lvl := level(-1) | ||||
| 	if p.options.defaultDirection == RightToLeft { | ||||
| 		lvl = 1 | ||||
| 	} | ||||
| 	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) | ||||
| 	if err != nil { | ||||
| 		return Ordering{}, err | ||||
| 	} | ||||
|  | ||||
| 	levels := para.getLevels([]int{len(p.types)}) | ||||
|  | ||||
| 	p.o = calculateOrdering(levels, p.runes) | ||||
| 	return p.o, nil | ||||
| } | ||||
|  | ||||
| // Line computes the visual ordering of runs for a single line starting and | ||||
| // ending at the given positions in the original text. | ||||
| func (p *Paragraph) Line(start, end int) (Ordering, error) { | ||||
| 	lineTypes := p.types[start:end] | ||||
| 	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) | ||||
| 	if err != nil { | ||||
| 		return Ordering{}, err | ||||
| 	} | ||||
| 	levels := para.getLevels([]int{len(lineTypes)}) | ||||
| 	o := calculateOrdering(levels, p.runes[start:end]) | ||||
| 	return o, nil | ||||
| } | ||||
|  | ||||
| // An Ordering holds the computed visual order of runs of a Paragraph. Calling | ||||
| // SetBytes or SetString on the originating Paragraph invalidates an Ordering. | ||||
| // The methods of an Ordering should only be called by one goroutine at a time. | ||||
| type Ordering struct { | ||||
| 	runes      [][]rune | ||||
| 	directions []Direction | ||||
| 	startpos   []int | ||||
| } | ||||
|  | ||||
| // Direction reports the directionality of the runs. | ||||
| // | ||||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | ||||
| func (o *Ordering) Direction() Direction { | ||||
| 	return o.directions[0] | ||||
| } | ||||
|  | ||||
| // NumRuns returns the number of runs. | ||||
| func (o *Ordering) NumRuns() int { | ||||
| 	return len(o.runes) | ||||
| } | ||||
|  | ||||
| // Run returns the ith run within the ordering. | ||||
| func (o *Ordering) Run(i int) Run { | ||||
| 	r := Run{ | ||||
| 		runes:     o.runes[i], | ||||
| 		direction: o.directions[i], | ||||
| 		startpos:  o.startpos[i], | ||||
| 	} | ||||
| 	return r | ||||
| } | ||||
|  | ||||
| // TODO: perhaps with options. | ||||
| // // Reorder creates a reader that reads the runes in visual order per character. | ||||
| // // Modifiers remain after the runes they modify. | ||||
| // func (l *Runs) Reorder() io.Reader { | ||||
| // 	panic("unimplemented") | ||||
| // } | ||||
|  | ||||
| // A Run is a continuous sequence of characters of a single direction. | ||||
| type Run struct { | ||||
| 	runes     []rune | ||||
| 	direction Direction | ||||
| 	startpos  int | ||||
| } | ||||
|  | ||||
| // String returns the text of the run in its original order. | ||||
| func (r *Run) String() string { | ||||
| 	return string(r.runes) | ||||
| } | ||||
|  | ||||
| // Bytes returns the text of the run in its original order. | ||||
| func (r *Run) Bytes() []byte { | ||||
| 	return []byte(r.String()) | ||||
| } | ||||
|  | ||||
| // TODO: methods for | ||||
| // - Display order | ||||
| // - headers and footers | ||||
| // - bracket replacement. | ||||
|  | ||||
| // Direction reports the direction of the run. | ||||
| func (r *Run) Direction() Direction { | ||||
| 	return r.direction | ||||
| } | ||||
|  | ||||
| // Pos returns the position of the Run within the text passed to SetBytes or SetString of the | ||||
| // originating Paragraph value. | ||||
| func (r *Run) Pos() (start, end int) { | ||||
| 	return r.startpos, r.startpos + len(r.runes) - 1 | ||||
| } | ||||
|  | ||||
| // AppendReverse reverses the order of characters of in, appends them to out, | ||||
| // and returns the result. Modifiers will still follow the runes they modify. | ||||
| // Brackets are replaced with their counterparts. | ||||
| func AppendReverse(out, in []byte) []byte { | ||||
| 	ret := make([]byte, len(in)+len(out)) | ||||
| 	copy(ret, out) | ||||
| 	inRunes := bytes.Runes(in) | ||||
|  | ||||
| 	for i, r := range inRunes { | ||||
| 		prop, _ := LookupRune(r) | ||||
| 		if prop.IsBracket() { | ||||
| 			inRunes[i] = prop.reverseBracket(r) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { | ||||
| 		inRunes[i], inRunes[j] = inRunes[j], inRunes[i] | ||||
| 	} | ||||
| 	copy(ret[len(out):], string(inRunes)) | ||||
|  | ||||
| 	return ret | ||||
| } | ||||
|  | ||||
| // ReverseString reverses the order of characters in s and returns a new string. | ||||
| // Modifiers will still follow the runes they modify. Brackets are replaced with | ||||
| // their counterparts. | ||||
| func ReverseString(s string) string { | ||||
| 	input := []rune(s) | ||||
| 	li := len(input) | ||||
| 	ret := make([]rune, li) | ||||
| 	for i, r := range input { | ||||
| 		prop, _ := LookupRune(r) | ||||
| 		if prop.IsBracket() { | ||||
| 			ret[li-i-1] = prop.reverseBracket(r) | ||||
| 		} else { | ||||
| 			ret[li-i-1] = r | ||||
| 		} | ||||
| 	} | ||||
| 	return string(ret) | ||||
| } | ||||
							
								
								
									
										335
									
								
								vendor/golang.org/x/text/unicode/bidi/bracket.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										335
									
								
								vendor/golang.org/x/text/unicode/bidi/bracket.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,335 @@ | ||||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| import ( | ||||
| 	"container/list" | ||||
| 	"fmt" | ||||
| 	"sort" | ||||
| ) | ||||
|  | ||||
| // This file contains a port of the reference implementation of the | ||||
| // Bidi Parentheses Algorithm: | ||||
| // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java | ||||
| // | ||||
| // The implementation in this file covers definitions BD14-BD16 and rule N0 | ||||
| // of UAX#9. | ||||
| // | ||||
| // Some preprocessing is done for each rune before data is passed to this | ||||
| // algorithm: | ||||
| //  - opening and closing brackets are identified | ||||
| //  - a bracket pair type, like '(' and ')' is assigned a unique identifier that | ||||
| //    is identical for the opening and closing bracket. It is left to do these | ||||
| //    mappings. | ||||
| //  - The BPA algorithm requires that bracket characters that are canonical | ||||
| //    equivalents of each other be able to be substituted for each other. | ||||
| //    It is the responsibility of the caller to do this canonicalization. | ||||
| // | ||||
| // In implementing BD16, this implementation departs slightly from the "logical" | ||||
| // algorithm defined in UAX#9. In particular, the stack referenced there | ||||
| // supports operations that go beyond a "basic" stack. An equivalent | ||||
| // implementation based on a linked list is used here. | ||||
|  | ||||
| // Bidi_Paired_Bracket_Type | ||||
| // BD14. An opening paired bracket is a character whose | ||||
| // Bidi_Paired_Bracket_Type property value is Open. | ||||
| // | ||||
| // BD15. A closing paired bracket is a character whose | ||||
| // Bidi_Paired_Bracket_Type property value is Close. | ||||
| type bracketType byte | ||||
|  | ||||
| const ( | ||||
| 	bpNone bracketType = iota | ||||
| 	bpOpen | ||||
| 	bpClose | ||||
| ) | ||||
|  | ||||
| // bracketPair holds a pair of index values for opening and closing bracket | ||||
| // location of a bracket pair. | ||||
| type bracketPair struct { | ||||
| 	opener int | ||||
| 	closer int | ||||
| } | ||||
|  | ||||
| func (b *bracketPair) String() string { | ||||
| 	return fmt.Sprintf("(%v, %v)", b.opener, b.closer) | ||||
| } | ||||
|  | ||||
| // bracketPairs is a slice of bracketPairs with a sort.Interface implementation. | ||||
| type bracketPairs []bracketPair | ||||
|  | ||||
| func (b bracketPairs) Len() int           { return len(b) } | ||||
| func (b bracketPairs) Swap(i, j int)      { b[i], b[j] = b[j], b[i] } | ||||
| func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } | ||||
|  | ||||
| // resolvePairedBrackets runs the paired bracket part of the UBA algorithm. | ||||
| // | ||||
| // For each rune, it takes the indexes into the original string, the class the | ||||
| // bracket type (in pairTypes) and the bracket identifier (pairValues). It also | ||||
| // takes the direction type for the start-of-sentence and the embedding level. | ||||
| // | ||||
| // The identifiers for bracket types are the rune of the canonicalized opening | ||||
| // bracket for brackets (open or close) or 0 for runes that are not brackets. | ||||
| func resolvePairedBrackets(s *isolatingRunSequence) { | ||||
| 	p := bracketPairer{ | ||||
| 		sos:              s.sos, | ||||
| 		openers:          list.New(), | ||||
| 		codesIsolatedRun: s.types, | ||||
| 		indexes:          s.indexes, | ||||
| 	} | ||||
| 	dirEmbed := L | ||||
| 	if s.level&1 != 0 { | ||||
| 		dirEmbed = R | ||||
| 	} | ||||
| 	p.locateBrackets(s.p.pairTypes, s.p.pairValues) | ||||
| 	p.resolveBrackets(dirEmbed, s.p.initialTypes) | ||||
| } | ||||
|  | ||||
| type bracketPairer struct { | ||||
| 	sos Class // direction corresponding to start of sequence | ||||
|  | ||||
| 	// The following is a restatement of BD 16 using non-algorithmic language. | ||||
| 	// | ||||
| 	// A bracket pair is a pair of characters consisting of an opening | ||||
| 	// paired bracket and a closing paired bracket such that the | ||||
| 	// Bidi_Paired_Bracket property value of the former equals the latter, | ||||
| 	// subject to the following constraints. | ||||
| 	// - both characters of a pair occur in the same isolating run sequence | ||||
| 	// - the closing character of a pair follows the opening character | ||||
| 	// - any bracket character can belong at most to one pair, the earliest possible one | ||||
| 	// - any bracket character not part of a pair is treated like an ordinary character | ||||
| 	// - pairs may nest properly, but their spans may not overlap otherwise | ||||
|  | ||||
| 	// Bracket characters with canonical decompositions are supposed to be | ||||
| 	// treated as if they had been normalized, to allow normalized and non- | ||||
| 	// normalized text to give the same result. In this implementation that step | ||||
| 	// is pushed out to the caller. The caller has to ensure that the pairValue | ||||
| 	// slices contain the rune of the opening bracket after normalization for | ||||
| 	// any opening or closing bracket. | ||||
|  | ||||
| 	openers *list.List // list of positions for opening brackets | ||||
|  | ||||
| 	// bracket pair positions sorted by location of opening bracket | ||||
| 	pairPositions bracketPairs | ||||
|  | ||||
| 	codesIsolatedRun []Class // directional bidi codes for an isolated run | ||||
| 	indexes          []int   // array of index values into the original string | ||||
|  | ||||
| } | ||||
|  | ||||
| // matchOpener reports whether characters at given positions form a matching | ||||
| // bracket pair. | ||||
| func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { | ||||
| 	return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] | ||||
| } | ||||
|  | ||||
| const maxPairingDepth = 63 | ||||
|  | ||||
| // locateBrackets locates matching bracket pairs according to BD16. | ||||
| // | ||||
| // This implementation uses a linked list instead of a stack, because, while | ||||
| // elements are added at the front (like a push) they are not generally removed | ||||
| // in atomic 'pop' operations, reducing the benefit of the stack archetype. | ||||
| func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { | ||||
| 	// traverse the run | ||||
| 	// do that explicitly (not in a for-each) so we can record position | ||||
| 	for i, index := range p.indexes { | ||||
|  | ||||
| 		// look at the bracket type for each character | ||||
| 		if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON { | ||||
| 			// continue scanning | ||||
| 			continue | ||||
| 		} | ||||
| 		switch pairTypes[index] { | ||||
| 		case bpOpen: | ||||
| 			// check if maximum pairing depth reached | ||||
| 			if p.openers.Len() == maxPairingDepth { | ||||
| 				p.openers.Init() | ||||
| 				return | ||||
| 			} | ||||
| 			// remember opener location, most recent first | ||||
| 			p.openers.PushFront(i) | ||||
|  | ||||
| 		case bpClose: | ||||
| 			// see if there is a match | ||||
| 			count := 0 | ||||
| 			for elem := p.openers.Front(); elem != nil; elem = elem.Next() { | ||||
| 				count++ | ||||
| 				opener := elem.Value.(int) | ||||
| 				if p.matchOpener(pairValues, opener, i) { | ||||
| 					// if the opener matches, add nested pair to the ordered list | ||||
| 					p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) | ||||
| 					// remove up to and including matched opener | ||||
| 					for ; count > 0; count-- { | ||||
| 						p.openers.Remove(p.openers.Front()) | ||||
| 					} | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			sort.Sort(p.pairPositions) | ||||
| 			// if we get here, the closing bracket matched no openers | ||||
| 			// and gets ignored | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Bracket pairs within an isolating run sequence are processed as units so | ||||
| // that both the opening and the closing paired bracket in a pair resolve to | ||||
| // the same direction. | ||||
| // | ||||
| // N0. Process bracket pairs in an isolating run sequence sequentially in | ||||
| // the logical order of the text positions of the opening paired brackets | ||||
| // using the logic given below. Within this scope, bidirectional types EN | ||||
| // and AN are treated as R. | ||||
| // | ||||
| // Identify the bracket pairs in the current isolating run sequence | ||||
| // according to BD16. For each bracket-pair element in the list of pairs of | ||||
| // text positions: | ||||
| // | ||||
| // a Inspect the bidirectional types of the characters enclosed within the | ||||
| // bracket pair. | ||||
| // | ||||
| // b If any strong type (either L or R) matching the embedding direction is | ||||
| // found, set the type for both brackets in the pair to match the embedding | ||||
| // direction. | ||||
| // | ||||
| // o [ e ] o -> o e e e o | ||||
| // | ||||
| // o [ o e ] -> o e o e e | ||||
| // | ||||
| // o [ NI e ] -> o e NI e e | ||||
| // | ||||
| // c Otherwise, if a strong type (opposite the embedding direction) is | ||||
| // found, test for adjacent strong types as follows: 1 First, check | ||||
| // backwards before the opening paired bracket until the first strong type | ||||
| // (L, R, or sos) is found. If that first preceding strong type is opposite | ||||
| // the embedding direction, then set the type for both brackets in the pair | ||||
| // to that type. 2 Otherwise, set the type for both brackets in the pair to | ||||
| // the embedding direction. | ||||
| // | ||||
| // o [ o ] e -> o o o o e | ||||
| // | ||||
| // o [ o NI ] o -> o o o NI o o | ||||
| // | ||||
| // e [ o ] o -> e e o e o | ||||
| // | ||||
| // e [ o ] e -> e e o e e | ||||
| // | ||||
| // e ( o [ o ] NI ) e -> e e o o o o NI e e | ||||
| // | ||||
| // d Otherwise, do not set the type for the current bracket pair. Note that | ||||
| // if the enclosed text contains no strong types the paired brackets will | ||||
| // both resolve to the same level when resolved individually using rules N1 | ||||
| // and N2. | ||||
| // | ||||
| // e ( NI ) o -> e ( NI ) o | ||||
|  | ||||
| // getStrongTypeN0 maps character's directional code to strong type as required | ||||
| // by rule N0. | ||||
| // | ||||
| // TODO: have separate type for "strong" directionality. | ||||
| func (p *bracketPairer) getStrongTypeN0(index int) Class { | ||||
| 	switch p.codesIsolatedRun[index] { | ||||
| 	// in the scope of N0, number types are treated as R | ||||
| 	case EN, AN, AL, R: | ||||
| 		return R | ||||
| 	case L: | ||||
| 		return L | ||||
| 	default: | ||||
| 		return ON | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // classifyPairContent reports the strong types contained inside a Bracket Pair, | ||||
| // assuming the given embedding direction. | ||||
| // | ||||
| // It returns ON if no strong type is found. If a single strong type is found, | ||||
| // it returns this type. Otherwise it returns the embedding direction. | ||||
| // | ||||
| // TODO: use separate type for "strong" directionality. | ||||
| func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class { | ||||
| 	dirOpposite := ON | ||||
| 	for i := loc.opener + 1; i < loc.closer; i++ { | ||||
| 		dir := p.getStrongTypeN0(i) | ||||
| 		if dir == ON { | ||||
| 			continue | ||||
| 		} | ||||
| 		if dir == dirEmbed { | ||||
| 			return dir // type matching embedding direction found | ||||
| 		} | ||||
| 		dirOpposite = dir | ||||
| 	} | ||||
| 	// return ON if no strong type found, or class opposite to dirEmbed | ||||
| 	return dirOpposite | ||||
| } | ||||
|  | ||||
| // classBeforePair determines which strong types are present before a Bracket | ||||
| // Pair. Return R or L if strong type found, otherwise ON. | ||||
| func (p *bracketPairer) classBeforePair(loc bracketPair) Class { | ||||
| 	for i := loc.opener - 1; i >= 0; i-- { | ||||
| 		if dir := p.getStrongTypeN0(i); dir != ON { | ||||
| 			return dir | ||||
| 		} | ||||
| 	} | ||||
| 	// no strong types found, return sos | ||||
| 	return p.sos | ||||
| } | ||||
|  | ||||
| // assignBracketType implements rule N0 for a single bracket pair. | ||||
| func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) { | ||||
| 	// rule "N0, a", inspect contents of pair | ||||
| 	dirPair := p.classifyPairContent(loc, dirEmbed) | ||||
|  | ||||
| 	// dirPair is now L, R, or N (no strong type found) | ||||
|  | ||||
| 	// the following logical tests are performed out of order compared to | ||||
| 	// the statement of the rules but yield the same results | ||||
| 	if dirPair == ON { | ||||
| 		return // case "d" - nothing to do | ||||
| 	} | ||||
|  | ||||
| 	if dirPair != dirEmbed { | ||||
| 		// case "c": strong type found, opposite - check before (c.1) | ||||
| 		dirPair = p.classBeforePair(loc) | ||||
| 		if dirPair == dirEmbed || dirPair == ON { | ||||
| 			// no strong opposite type found before - use embedding (c.2) | ||||
| 			dirPair = dirEmbed | ||||
| 		} | ||||
| 	} | ||||
| 	// else: case "b", strong type found matching embedding, | ||||
| 	// no explicit action needed, as dirPair is already set to embedding | ||||
| 	// direction | ||||
|  | ||||
| 	// set the bracket types to the type found | ||||
| 	p.setBracketsToType(loc, dirPair, initialTypes) | ||||
| } | ||||
|  | ||||
| func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) { | ||||
| 	p.codesIsolatedRun[loc.opener] = dirPair | ||||
| 	p.codesIsolatedRun[loc.closer] = dirPair | ||||
|  | ||||
| 	for i := loc.opener + 1; i < loc.closer; i++ { | ||||
| 		index := p.indexes[i] | ||||
| 		if initialTypes[index] != NSM { | ||||
| 			break | ||||
| 		} | ||||
| 		p.codesIsolatedRun[i] = dirPair | ||||
| 	} | ||||
|  | ||||
| 	for i := loc.closer + 1; i < len(p.indexes); i++ { | ||||
| 		index := p.indexes[i] | ||||
| 		if initialTypes[index] != NSM { | ||||
| 			break | ||||
| 		} | ||||
| 		p.codesIsolatedRun[i] = dirPair | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // resolveBrackets implements rule N0 for a list of pairs. | ||||
| func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) { | ||||
| 	for _, loc := range p.pairPositions { | ||||
| 		p.assignBracketType(loc, dirEmbed, initialTypes) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										1071
									
								
								vendor/golang.org/x/text/unicode/bidi/core.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1071
									
								
								vendor/golang.org/x/text/unicode/bidi/core.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										206
									
								
								vendor/golang.org/x/text/unicode/bidi/prop.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										206
									
								
								vendor/golang.org/x/text/unicode/bidi/prop.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,206 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| // Properties provides access to BiDi properties of runes. | ||||
| type Properties struct { | ||||
| 	entry uint8 | ||||
| 	last  uint8 | ||||
| } | ||||
|  | ||||
| var trie = newBidiTrie(0) | ||||
|  | ||||
| // TODO: using this for bidirule reduces the running time by about 5%. Consider | ||||
| // if this is worth exposing or if we can find a way to speed up the Class | ||||
| // method. | ||||
| // | ||||
| // // CompactClass is like Class, but maps all of the BiDi control classes | ||||
| // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control. | ||||
| // func (p Properties) CompactClass() Class { | ||||
| // 	return Class(p.entry & 0x0F) | ||||
| // } | ||||
|  | ||||
| // Class returns the Bidi class for p. | ||||
| func (p Properties) Class() Class { | ||||
| 	c := Class(p.entry & 0x0F) | ||||
| 	if c == Control { | ||||
| 		c = controlByteToClass[p.last&0xF] | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
|  | ||||
| // IsBracket reports whether the rune is a bracket. | ||||
| func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 } | ||||
|  | ||||
| // IsOpeningBracket reports whether the rune is an opening bracket. | ||||
| // IsBracket must return true. | ||||
| func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 } | ||||
|  | ||||
| // TODO: find a better API and expose. | ||||
| func (p Properties) reverseBracket(r rune) rune { | ||||
| 	return xorMasks[p.entry>>xorMaskShift] ^ r | ||||
| } | ||||
|  | ||||
| var controlByteToClass = [16]Class{ | ||||
| 	0xD: LRO, // U+202D LeftToRightOverride, | ||||
| 	0xE: RLO, // U+202E RightToLeftOverride, | ||||
| 	0xA: LRE, // U+202A LeftToRightEmbedding, | ||||
| 	0xB: RLE, // U+202B RightToLeftEmbedding, | ||||
| 	0xC: PDF, // U+202C PopDirectionalFormat, | ||||
| 	0x6: LRI, // U+2066 LeftToRightIsolate, | ||||
| 	0x7: RLI, // U+2067 RightToLeftIsolate, | ||||
| 	0x8: FSI, // U+2068 FirstStrongIsolate, | ||||
| 	0x9: PDI, // U+2069 PopDirectionalIsolate, | ||||
| } | ||||
|  | ||||
| // LookupRune returns properties for r. | ||||
| func LookupRune(r rune) (p Properties, size int) { | ||||
| 	var buf [4]byte | ||||
| 	n := utf8.EncodeRune(buf[:], r) | ||||
| 	return Lookup(buf[:n]) | ||||
| } | ||||
|  | ||||
| // TODO: these lookup methods are based on the generated trie code. The returned | ||||
| // sizes have slightly different semantics from the generated code, in that it | ||||
| // always returns size==1 for an illegal UTF-8 byte (instead of the length | ||||
| // of the maximum invalid subsequence). Most Transformers, like unicode/norm, | ||||
| // leave invalid UTF-8 untouched, in which case it has performance benefits to | ||||
| // do so (without changing the semantics). Bidi requires the semantics used here | ||||
| // for the bidirule implementation to be compatible with the Go semantics. | ||||
| //  They ultimately should perhaps be adopted by all trie implementations, for | ||||
| // convenience sake. | ||||
| // This unrolled code also boosts performance of the secure/bidirule package by | ||||
| // about 30%. | ||||
| // So, to remove this code: | ||||
| //   - add option to trie generator to define return type. | ||||
| //   - always return 1 byte size for ill-formed UTF-8 runes. | ||||
|  | ||||
| // Lookup returns properties for the first rune in s and the width in bytes of | ||||
| // its encoding. The size will be 0 if s does not hold enough bytes to complete | ||||
| // the encoding. | ||||
| func Lookup(s []byte) (p Properties, sz int) { | ||||
| 	c0 := s[0] | ||||
| 	switch { | ||||
| 	case c0 < 0x80: // is ASCII | ||||
| 		return Properties{entry: bidiValues[c0]}, 1 | ||||
| 	case c0 < 0xC2: | ||||
| 		return Properties{}, 1 | ||||
| 	case c0 < 0xE0: // 2-byte UTF-8 | ||||
| 		if len(s) < 2 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 | ||||
| 	case c0 < 0xF0: // 3-byte UTF-8 | ||||
| 		if len(s) < 3 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 | ||||
| 	case c0 < 0xF8: // 4-byte UTF-8 | ||||
| 		if len(s) < 4 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o = uint32(i)<<6 + uint32(c2) | ||||
| 		i = bidiIndex[o] | ||||
| 		c3 := s[3] | ||||
| 		if c3 < 0x80 || 0xC0 <= c3 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 | ||||
| 	} | ||||
| 	// Illegal rune | ||||
| 	return Properties{}, 1 | ||||
| } | ||||
|  | ||||
| // LookupString returns properties for the first rune in s and the width in | ||||
| // bytes of its encoding. The size will be 0 if s does not hold enough bytes to | ||||
| // complete the encoding. | ||||
| func LookupString(s string) (p Properties, sz int) { | ||||
| 	c0 := s[0] | ||||
| 	switch { | ||||
| 	case c0 < 0x80: // is ASCII | ||||
| 		return Properties{entry: bidiValues[c0]}, 1 | ||||
| 	case c0 < 0xC2: | ||||
| 		return Properties{}, 1 | ||||
| 	case c0 < 0xE0: // 2-byte UTF-8 | ||||
| 		if len(s) < 2 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 | ||||
| 	case c0 < 0xF0: // 3-byte UTF-8 | ||||
| 		if len(s) < 3 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 | ||||
| 	case c0 < 0xF8: // 4-byte UTF-8 | ||||
| 		if len(s) < 4 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o = uint32(i)<<6 + uint32(c2) | ||||
| 		i = bidiIndex[o] | ||||
| 		c3 := s[3] | ||||
| 		if c3 < 0x80 || 0xC0 <= c3 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 | ||||
| 	} | ||||
| 	// Illegal rune | ||||
| 	return Properties{}, 1 | ||||
| } | ||||
							
								
								
									
										1815
									
								
								vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1815
									
								
								vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1887
									
								
								vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1887
									
								
								vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1923
									
								
								vendor/golang.org/x/text/unicode/bidi/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1923
									
								
								vendor/golang.org/x/text/unicode/bidi/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1955
									
								
								vendor/golang.org/x/text/unicode/bidi/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1955
									
								
								vendor/golang.org/x/text/unicode/bidi/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1781
									
								
								vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1781
									
								
								vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										60
									
								
								vendor/golang.org/x/text/unicode/bidi/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								vendor/golang.org/x/text/unicode/bidi/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| // Class is the Unicode BiDi class. Each rune has a single class. | ||||
| type Class uint | ||||
|  | ||||
| const ( | ||||
| 	L       Class = iota // LeftToRight | ||||
| 	R                    // RightToLeft | ||||
| 	EN                   // EuropeanNumber | ||||
| 	ES                   // EuropeanSeparator | ||||
| 	ET                   // EuropeanTerminator | ||||
| 	AN                   // ArabicNumber | ||||
| 	CS                   // CommonSeparator | ||||
| 	B                    // ParagraphSeparator | ||||
| 	S                    // SegmentSeparator | ||||
| 	WS                   // WhiteSpace | ||||
| 	ON                   // OtherNeutral | ||||
| 	BN                   // BoundaryNeutral | ||||
| 	NSM                  // NonspacingMark | ||||
| 	AL                   // ArabicLetter | ||||
| 	Control              // Control LRO - PDI | ||||
|  | ||||
| 	numClass | ||||
|  | ||||
| 	LRO // LeftToRightOverride | ||||
| 	RLO // RightToLeftOverride | ||||
| 	LRE // LeftToRightEmbedding | ||||
| 	RLE // RightToLeftEmbedding | ||||
| 	PDF // PopDirectionalFormat | ||||
| 	LRI // LeftToRightIsolate | ||||
| 	RLI // RightToLeftIsolate | ||||
| 	FSI // FirstStrongIsolate | ||||
| 	PDI // PopDirectionalIsolate | ||||
|  | ||||
| 	unknownClass = ^Class(0) | ||||
| ) | ||||
|  | ||||
| var controlToClass = map[rune]Class{ | ||||
| 	0x202D: LRO, // LeftToRightOverride, | ||||
| 	0x202E: RLO, // RightToLeftOverride, | ||||
| 	0x202A: LRE, // LeftToRightEmbedding, | ||||
| 	0x202B: RLE, // RightToLeftEmbedding, | ||||
| 	0x202C: PDF, // PopDirectionalFormat, | ||||
| 	0x2066: LRI, // LeftToRightIsolate, | ||||
| 	0x2067: RLI, // RightToLeftIsolate, | ||||
| 	0x2068: FSI, // FirstStrongIsolate, | ||||
| 	0x2069: PDI, // PopDirectionalIsolate, | ||||
| } | ||||
|  | ||||
| // A trie entry has the following bits: | ||||
| // 7..5  XOR mask for brackets | ||||
| // 4     1: Bracket open, 0: Bracket close | ||||
| // 3..0  Class type | ||||
|  | ||||
| const ( | ||||
| 	openMask     = 0x10 | ||||
| 	xorMaskShift = 5 | ||||
| ) | ||||
							
								
								
									
										512
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										512
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,512 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| const ( | ||||
| 	maxNonStarters = 30 | ||||
| 	// The maximum number of characters needed for a buffer is | ||||
| 	// maxNonStarters + 1 for the starter + 1 for the GCJ | ||||
| 	maxBufferSize    = maxNonStarters + 2 | ||||
| 	maxNFCExpansion  = 3  // NFC(0x1D160) | ||||
| 	maxNFKCExpansion = 18 // NFKC(0xFDFA) | ||||
|  | ||||
| 	maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128 | ||||
| ) | ||||
|  | ||||
| // ssState is used for reporting the segment state after inserting a rune. | ||||
| // It is returned by streamSafe.next. | ||||
| type ssState int | ||||
|  | ||||
| const ( | ||||
| 	// Indicates a rune was successfully added to the segment. | ||||
| 	ssSuccess ssState = iota | ||||
| 	// Indicates a rune starts a new segment and should not be added. | ||||
| 	ssStarter | ||||
| 	// Indicates a rune caused a segment overflow and a CGJ should be inserted. | ||||
| 	ssOverflow | ||||
| ) | ||||
|  | ||||
| // streamSafe implements the policy of when a CGJ should be inserted. | ||||
| type streamSafe uint8 | ||||
|  | ||||
| // first inserts the first rune of a segment. It is a faster version of next if | ||||
| // it is known p represents the first rune in a segment. | ||||
| func (ss *streamSafe) first(p Properties) { | ||||
| 	*ss = streamSafe(p.nTrailingNonStarters()) | ||||
| } | ||||
|  | ||||
| // insert returns a ssState value to indicate whether a rune represented by p | ||||
| // can be inserted. | ||||
| func (ss *streamSafe) next(p Properties) ssState { | ||||
| 	if *ss > maxNonStarters { | ||||
| 		panic("streamSafe was not reset") | ||||
| 	} | ||||
| 	n := p.nLeadingNonStarters() | ||||
| 	if *ss += streamSafe(n); *ss > maxNonStarters { | ||||
| 		*ss = 0 | ||||
| 		return ssOverflow | ||||
| 	} | ||||
| 	// The Stream-Safe Text Processing prescribes that the counting can stop | ||||
| 	// as soon as a starter is encountered. However, there are some starters, | ||||
| 	// like Jamo V and T, that can combine with other runes, leaving their | ||||
| 	// successive non-starters appended to the previous, possibly causing an | ||||
| 	// overflow. We will therefore consider any rune with a non-zero nLead to | ||||
| 	// be a non-starter. Note that it always hold that if nLead > 0 then | ||||
| 	// nLead == nTrail. | ||||
| 	if n == 0 { | ||||
| 		*ss = streamSafe(p.nTrailingNonStarters()) | ||||
| 		return ssStarter | ||||
| 	} | ||||
| 	return ssSuccess | ||||
| } | ||||
|  | ||||
| // backwards is used for checking for overflow and segment starts | ||||
| // when traversing a string backwards. Users do not need to call first | ||||
| // for the first rune. The state of the streamSafe retains the count of | ||||
| // the non-starters loaded. | ||||
| func (ss *streamSafe) backwards(p Properties) ssState { | ||||
| 	if *ss > maxNonStarters { | ||||
| 		panic("streamSafe was not reset") | ||||
| 	} | ||||
| 	c := *ss + streamSafe(p.nTrailingNonStarters()) | ||||
| 	if c > maxNonStarters { | ||||
| 		return ssOverflow | ||||
| 	} | ||||
| 	*ss = c | ||||
| 	if p.nLeadingNonStarters() == 0 { | ||||
| 		return ssStarter | ||||
| 	} | ||||
| 	return ssSuccess | ||||
| } | ||||
|  | ||||
| func (ss streamSafe) isMax() bool { | ||||
| 	return ss == maxNonStarters | ||||
| } | ||||
|  | ||||
| // GraphemeJoiner is inserted after maxNonStarters non-starter runes. | ||||
| const GraphemeJoiner = "\u034F" | ||||
|  | ||||
| // reorderBuffer is used to normalize a single segment.  Characters inserted with | ||||
| // insert are decomposed and reordered based on CCC. The compose method can | ||||
| // be used to recombine characters.  Note that the byte buffer does not hold | ||||
| // the UTF-8 characters in order.  Only the rune array is maintained in sorted | ||||
| // order. flush writes the resulting segment to a byte array. | ||||
| type reorderBuffer struct { | ||||
| 	rune  [maxBufferSize]Properties // Per character info. | ||||
| 	byte  [maxByteBufferSize]byte   // UTF-8 buffer. Referenced by runeInfo.pos. | ||||
| 	nbyte uint8                     // Number or bytes. | ||||
| 	ss    streamSafe                // For limiting length of non-starter sequence. | ||||
| 	nrune int                       // Number of runeInfos. | ||||
| 	f     formInfo | ||||
|  | ||||
| 	src      input | ||||
| 	nsrc     int | ||||
| 	tmpBytes input | ||||
|  | ||||
| 	out    []byte | ||||
| 	flushF func(*reorderBuffer) bool | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) init(f Form, src []byte) { | ||||
| 	rb.f = *formTable[f] | ||||
| 	rb.src.setBytes(src) | ||||
| 	rb.nsrc = len(src) | ||||
| 	rb.ss = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) initString(f Form, src string) { | ||||
| 	rb.f = *formTable[f] | ||||
| 	rb.src.setString(src) | ||||
| 	rb.nsrc = len(src) | ||||
| 	rb.ss = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) { | ||||
| 	rb.out = out | ||||
| 	rb.flushF = f | ||||
| } | ||||
|  | ||||
| // reset discards all characters from the buffer. | ||||
| func (rb *reorderBuffer) reset() { | ||||
| 	rb.nrune = 0 | ||||
| 	rb.nbyte = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) doFlush() bool { | ||||
| 	if rb.f.composing { | ||||
| 		rb.compose() | ||||
| 	} | ||||
| 	res := rb.flushF(rb) | ||||
| 	rb.reset() | ||||
| 	return res | ||||
| } | ||||
|  | ||||
| // appendFlush appends the normalized segment to rb.out. | ||||
| func appendFlush(rb *reorderBuffer) bool { | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		start := rb.rune[i].pos | ||||
| 		end := start + rb.rune[i].size | ||||
| 		rb.out = append(rb.out, rb.byte[start:end]...) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // flush appends the normalized segment to out and resets rb. | ||||
| func (rb *reorderBuffer) flush(out []byte) []byte { | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		start := rb.rune[i].pos | ||||
| 		end := start + rb.rune[i].size | ||||
| 		out = append(out, rb.byte[start:end]...) | ||||
| 	} | ||||
| 	rb.reset() | ||||
| 	return out | ||||
| } | ||||
|  | ||||
| // flushCopy copies the normalized segment to buf and resets rb. | ||||
| // It returns the number of bytes written to buf. | ||||
| func (rb *reorderBuffer) flushCopy(buf []byte) int { | ||||
| 	p := 0 | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		runep := rb.rune[i] | ||||
| 		p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size]) | ||||
| 	} | ||||
| 	rb.reset() | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| // insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class. | ||||
| // It returns false if the buffer is not large enough to hold the rune. | ||||
| // It is used internally by insert and insertString only. | ||||
| func (rb *reorderBuffer) insertOrdered(info Properties) { | ||||
| 	n := rb.nrune | ||||
| 	b := rb.rune[:] | ||||
| 	cc := info.ccc | ||||
| 	if cc > 0 { | ||||
| 		// Find insertion position + move elements to make room. | ||||
| 		for ; n > 0; n-- { | ||||
| 			if b[n-1].ccc <= cc { | ||||
| 				break | ||||
| 			} | ||||
| 			b[n] = b[n-1] | ||||
| 		} | ||||
| 	} | ||||
| 	rb.nrune += 1 | ||||
| 	pos := uint8(rb.nbyte) | ||||
| 	rb.nbyte += utf8.UTFMax | ||||
| 	info.pos = pos | ||||
| 	b[n] = info | ||||
| } | ||||
|  | ||||
| // insertErr is an error code returned by insert. Using this type instead | ||||
| // of error improves performance up to 20% for many of the benchmarks. | ||||
| type insertErr int | ||||
|  | ||||
| const ( | ||||
| 	iSuccess insertErr = -iota | ||||
| 	iShortDst | ||||
| 	iShortSrc | ||||
| ) | ||||
|  | ||||
| // insertFlush inserts the given rune in the buffer ordered by CCC. | ||||
| // If a decomposition with multiple segments are encountered, they leading | ||||
| // ones are flushed. | ||||
| // It returns a non-zero error code if the rune was not inserted. | ||||
| func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr { | ||||
| 	if rune := src.hangul(i); rune != 0 { | ||||
| 		rb.decomposeHangul(rune) | ||||
| 		return iSuccess | ||||
| 	} | ||||
| 	if info.hasDecomposition() { | ||||
| 		return rb.insertDecomposed(info.Decomposition()) | ||||
| 	} | ||||
| 	rb.insertSingle(src, i, info) | ||||
| 	return iSuccess | ||||
| } | ||||
|  | ||||
| // insertUnsafe inserts the given rune in the buffer ordered by CCC. | ||||
| // It is assumed there is sufficient space to hold the runes. It is the | ||||
| // responsibility of the caller to ensure this. This can be done by checking | ||||
| // the state returned by the streamSafe type. | ||||
| func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) { | ||||
| 	if rune := src.hangul(i); rune != 0 { | ||||
| 		rb.decomposeHangul(rune) | ||||
| 	} | ||||
| 	if info.hasDecomposition() { | ||||
| 		// TODO: inline. | ||||
| 		rb.insertDecomposed(info.Decomposition()) | ||||
| 	} else { | ||||
| 		rb.insertSingle(src, i, info) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // insertDecomposed inserts an entry in to the reorderBuffer for each rune | ||||
| // in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes. | ||||
| // It flushes the buffer on each new segment start. | ||||
| func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr { | ||||
| 	rb.tmpBytes.setBytes(dcomp) | ||||
| 	// As the streamSafe accounting already handles the counting for modifiers, | ||||
| 	// we don't have to call next. However, we do need to keep the accounting | ||||
| 	// intact when flushing the buffer. | ||||
| 	for i := 0; i < len(dcomp); { | ||||
| 		info := rb.f.info(rb.tmpBytes, i) | ||||
| 		if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() { | ||||
| 			return iShortDst | ||||
| 		} | ||||
| 		i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)]) | ||||
| 		rb.insertOrdered(info) | ||||
| 	} | ||||
| 	return iSuccess | ||||
| } | ||||
|  | ||||
| // insertSingle inserts an entry in the reorderBuffer for the rune at | ||||
| // position i. info is the runeInfo for the rune at position i. | ||||
| func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) { | ||||
| 	src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size)) | ||||
| 	rb.insertOrdered(info) | ||||
| } | ||||
|  | ||||
| // insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb. | ||||
| func (rb *reorderBuffer) insertCGJ() { | ||||
| 	rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))}) | ||||
| } | ||||
|  | ||||
| // appendRune inserts a rune at the end of the buffer. It is used for Hangul. | ||||
| func (rb *reorderBuffer) appendRune(r rune) { | ||||
| 	bn := rb.nbyte | ||||
| 	sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) | ||||
| 	rb.nbyte += utf8.UTFMax | ||||
| 	rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)} | ||||
| 	rb.nrune++ | ||||
| } | ||||
|  | ||||
| // assignRune sets a rune at position pos. It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) assignRune(pos int, r rune) { | ||||
| 	bn := rb.rune[pos].pos | ||||
| 	sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) | ||||
| 	rb.rune[pos] = Properties{pos: bn, size: uint8(sz)} | ||||
| } | ||||
|  | ||||
| // runeAt returns the rune at position n. It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) runeAt(n int) rune { | ||||
| 	inf := rb.rune[n] | ||||
| 	r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size]) | ||||
| 	return r | ||||
| } | ||||
|  | ||||
| // bytesAt returns the UTF-8 encoding of the rune at position n. | ||||
| // It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) bytesAt(n int) []byte { | ||||
| 	inf := rb.rune[n] | ||||
| 	return rb.byte[inf.pos : int(inf.pos)+int(inf.size)] | ||||
| } | ||||
|  | ||||
| // For Hangul we combine algorithmically, instead of using tables. | ||||
| const ( | ||||
| 	hangulBase  = 0xAC00 // UTF-8(hangulBase) -> EA B0 80 | ||||
| 	hangulBase0 = 0xEA | ||||
| 	hangulBase1 = 0xB0 | ||||
| 	hangulBase2 = 0x80 | ||||
|  | ||||
| 	hangulEnd  = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4 | ||||
| 	hangulEnd0 = 0xED | ||||
| 	hangulEnd1 = 0x9E | ||||
| 	hangulEnd2 = 0xA4 | ||||
|  | ||||
| 	jamoLBase  = 0x1100 // UTF-8(jamoLBase) -> E1 84 00 | ||||
| 	jamoLBase0 = 0xE1 | ||||
| 	jamoLBase1 = 0x84 | ||||
| 	jamoLEnd   = 0x1113 | ||||
| 	jamoVBase  = 0x1161 | ||||
| 	jamoVEnd   = 0x1176 | ||||
| 	jamoTBase  = 0x11A7 | ||||
| 	jamoTEnd   = 0x11C3 | ||||
|  | ||||
| 	jamoTCount   = 28 | ||||
| 	jamoVCount   = 21 | ||||
| 	jamoVTCount  = 21 * 28 | ||||
| 	jamoLVTCount = 19 * 21 * 28 | ||||
| ) | ||||
|  | ||||
| const hangulUTF8Size = 3 | ||||
|  | ||||
| func isHangul(b []byte) bool { | ||||
| 	if len(b) < hangulUTF8Size { | ||||
| 		return false | ||||
| 	} | ||||
| 	b0 := b[0] | ||||
| 	if b0 < hangulBase0 { | ||||
| 		return false | ||||
| 	} | ||||
| 	b1 := b[1] | ||||
| 	switch { | ||||
| 	case b0 == hangulBase0: | ||||
| 		return b1 >= hangulBase1 | ||||
| 	case b0 < hangulEnd0: | ||||
| 		return true | ||||
| 	case b0 > hangulEnd0: | ||||
| 		return false | ||||
| 	case b1 < hangulEnd1: | ||||
| 		return true | ||||
| 	} | ||||
| 	return b1 == hangulEnd1 && b[2] < hangulEnd2 | ||||
| } | ||||
|  | ||||
| func isHangulString(b string) bool { | ||||
| 	if len(b) < hangulUTF8Size { | ||||
| 		return false | ||||
| 	} | ||||
| 	b0 := b[0] | ||||
| 	if b0 < hangulBase0 { | ||||
| 		return false | ||||
| 	} | ||||
| 	b1 := b[1] | ||||
| 	switch { | ||||
| 	case b0 == hangulBase0: | ||||
| 		return b1 >= hangulBase1 | ||||
| 	case b0 < hangulEnd0: | ||||
| 		return true | ||||
| 	case b0 > hangulEnd0: | ||||
| 		return false | ||||
| 	case b1 < hangulEnd1: | ||||
| 		return true | ||||
| 	} | ||||
| 	return b1 == hangulEnd1 && b[2] < hangulEnd2 | ||||
| } | ||||
|  | ||||
| // Caller must ensure len(b) >= 2. | ||||
| func isJamoVT(b []byte) bool { | ||||
| 	// True if (rune & 0xff00) == jamoLBase | ||||
| 	return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1 | ||||
| } | ||||
|  | ||||
| func isHangulWithoutJamoT(b []byte) bool { | ||||
| 	c, _ := utf8.DecodeRune(b) | ||||
| 	c -= hangulBase | ||||
| 	return c < jamoLVTCount && c%jamoTCount == 0 | ||||
| } | ||||
|  | ||||
| // decomposeHangul writes the decomposed Hangul to buf and returns the number | ||||
| // of bytes written.  len(buf) should be at least 9. | ||||
| func decomposeHangul(buf []byte, r rune) int { | ||||
| 	const JamoUTF8Len = 3 | ||||
| 	r -= hangulBase | ||||
| 	x := r % jamoTCount | ||||
| 	r /= jamoTCount | ||||
| 	utf8.EncodeRune(buf, jamoLBase+r/jamoVCount) | ||||
| 	utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount) | ||||
| 	if x != 0 { | ||||
| 		utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x) | ||||
| 		return 3 * JamoUTF8Len | ||||
| 	} | ||||
| 	return 2 * JamoUTF8Len | ||||
| } | ||||
|  | ||||
| // decomposeHangul algorithmically decomposes a Hangul rune into | ||||
| // its Jamo components. | ||||
| // See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul. | ||||
| func (rb *reorderBuffer) decomposeHangul(r rune) { | ||||
| 	r -= hangulBase | ||||
| 	x := r % jamoTCount | ||||
| 	r /= jamoTCount | ||||
| 	rb.appendRune(jamoLBase + r/jamoVCount) | ||||
| 	rb.appendRune(jamoVBase + r%jamoVCount) | ||||
| 	if x != 0 { | ||||
| 		rb.appendRune(jamoTBase + x) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // combineHangul algorithmically combines Jamo character components into Hangul. | ||||
| // See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul. | ||||
| func (rb *reorderBuffer) combineHangul(s, i, k int) { | ||||
| 	b := rb.rune[:] | ||||
| 	bn := rb.nrune | ||||
| 	for ; i < bn; i++ { | ||||
| 		cccB := b[k-1].ccc | ||||
| 		cccC := b[i].ccc | ||||
| 		if cccB == 0 { | ||||
| 			s = k - 1 | ||||
| 		} | ||||
| 		if s != k-1 && cccB >= cccC { | ||||
| 			// b[i] is blocked by greater-equal cccX below it | ||||
| 			b[k] = b[i] | ||||
| 			k++ | ||||
| 		} else { | ||||
| 			l := rb.runeAt(s) // also used to compare to hangulBase | ||||
| 			v := rb.runeAt(i) // also used to compare to jamoT | ||||
| 			switch { | ||||
| 			case jamoLBase <= l && l < jamoLEnd && | ||||
| 				jamoVBase <= v && v < jamoVEnd: | ||||
| 				// 11xx plus 116x to LV | ||||
| 				rb.assignRune(s, hangulBase+ | ||||
| 					(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount) | ||||
| 			case hangulBase <= l && l < hangulEnd && | ||||
| 				jamoTBase < v && v < jamoTEnd && | ||||
| 				((l-hangulBase)%jamoTCount) == 0: | ||||
| 				// ACxx plus 11Ax to LVT | ||||
| 				rb.assignRune(s, l+v-jamoTBase) | ||||
| 			default: | ||||
| 				b[k] = b[i] | ||||
| 				k++ | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	rb.nrune = k | ||||
| } | ||||
|  | ||||
| // compose recombines the runes in the buffer. | ||||
| // It should only be used to recompose a single segment, as it will not | ||||
| // handle alternations between Hangul and non-Hangul characters correctly. | ||||
| func (rb *reorderBuffer) compose() { | ||||
| 	// Lazily load the map used by the combine func below, but do | ||||
| 	// it outside of the loop. | ||||
| 	recompMapOnce.Do(buildRecompMap) | ||||
|  | ||||
| 	// UAX #15, section X5 , including Corrigendum #5 | ||||
| 	// "In any character sequence beginning with starter S, a character C is | ||||
| 	//  blocked from S if and only if there is some character B between S | ||||
| 	//  and C, and either B is a starter or it has the same or higher | ||||
| 	//  combining class as C." | ||||
| 	bn := rb.nrune | ||||
| 	if bn == 0 { | ||||
| 		return | ||||
| 	} | ||||
| 	k := 1 | ||||
| 	b := rb.rune[:] | ||||
| 	for s, i := 0, 1; i < bn; i++ { | ||||
| 		if isJamoVT(rb.bytesAt(i)) { | ||||
| 			// Redo from start in Hangul mode. Necessary to support | ||||
| 			// U+320E..U+321E in NFKC mode. | ||||
| 			rb.combineHangul(s, i, k) | ||||
| 			return | ||||
| 		} | ||||
| 		ii := b[i] | ||||
| 		// We can only use combineForward as a filter if we later | ||||
| 		// get the info for the combined character. This is more | ||||
| 		// expensive than using the filter. Using combinesBackward() | ||||
| 		// is safe. | ||||
| 		if ii.combinesBackward() { | ||||
| 			cccB := b[k-1].ccc | ||||
| 			cccC := ii.ccc | ||||
| 			blocked := false // b[i] blocked by starter or greater or equal CCC? | ||||
| 			if cccB == 0 { | ||||
| 				s = k - 1 | ||||
| 			} else { | ||||
| 				blocked = s != k-1 && cccB >= cccC | ||||
| 			} | ||||
| 			if !blocked { | ||||
| 				combined := combine(rb.runeAt(s), rb.runeAt(i)) | ||||
| 				if combined != 0 { | ||||
| 					rb.assignRune(s, combined) | ||||
| 					continue | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		b[k] = b[i] | ||||
| 		k++ | ||||
| 	} | ||||
| 	rb.nrune = k | ||||
| } | ||||
							
								
								
									
										278
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,278 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "encoding/binary" | ||||
|  | ||||
| // This file contains Form-specific logic and wrappers for data in tables.go. | ||||
|  | ||||
| // Rune info is stored in a separate trie per composing form. A composing form | ||||
| // and its corresponding decomposing form share the same trie.  Each trie maps | ||||
| // a rune to a uint16. The values take two forms.  For v >= 0x8000: | ||||
| //   bits | ||||
| //   15:    1 (inverse of NFD_QC bit of qcInfo) | ||||
| //   13..7: qcInfo (see below). isYesD is always true (no decompostion). | ||||
| //    6..0: ccc (compressed CCC value). | ||||
| // For v < 0x8000, the respective rune has a decomposition and v is an index | ||||
| // into a byte array of UTF-8 decomposition sequences and additional info and | ||||
| // has the form: | ||||
| //    <header> <decomp_byte>* [<tccc> [<lccc>]] | ||||
| // The header contains the number of bytes in the decomposition (excluding this | ||||
| // length byte). The two most significant bits of this length byte correspond | ||||
| // to bit 5 and 4 of qcInfo (see below).  The byte sequence itself starts at v+1. | ||||
| // The byte sequence is followed by a trailing and leading CCC if the values | ||||
| // for these are not zero.  The value of v determines which ccc are appended | ||||
| // to the sequences.  For v < firstCCC, there are none, for v >= firstCCC, | ||||
| // the sequence is followed by a trailing ccc, and for v >= firstLeadingCC | ||||
| // there is an additional leading ccc. The value of tccc itself is the | ||||
| // trailing CCC shifted left 2 bits. The two least-significant bits of tccc | ||||
| // are the number of trailing non-starters. | ||||
|  | ||||
| const ( | ||||
| 	qcInfoMask      = 0x3F // to clear all but the relevant bits in a qcInfo | ||||
| 	headerLenMask   = 0x3F // extract the length value from the header byte | ||||
| 	headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte | ||||
| ) | ||||
|  | ||||
| // Properties provides access to normalization properties of a rune. | ||||
| type Properties struct { | ||||
| 	pos   uint8  // start position in reorderBuffer; used in composition.go | ||||
| 	size  uint8  // length of UTF-8 encoding of this rune | ||||
| 	ccc   uint8  // leading canonical combining class (ccc if not decomposition) | ||||
| 	tccc  uint8  // trailing canonical combining class (ccc if not decomposition) | ||||
| 	nLead uint8  // number of leading non-starters. | ||||
| 	flags qcInfo // quick check flags | ||||
| 	index uint16 | ||||
| } | ||||
|  | ||||
| // functions dispatchable per form | ||||
| type lookupFunc func(b input, i int) Properties | ||||
|  | ||||
| // formInfo holds Form-specific functions and tables. | ||||
| type formInfo struct { | ||||
| 	form                     Form | ||||
| 	composing, compatibility bool // form type | ||||
| 	info                     lookupFunc | ||||
| 	nextMain                 iterFunc | ||||
| } | ||||
|  | ||||
| var formTable = []*formInfo{{ | ||||
| 	form:          NFC, | ||||
| 	composing:     true, | ||||
| 	compatibility: false, | ||||
| 	info:          lookupInfoNFC, | ||||
| 	nextMain:      nextComposed, | ||||
| }, { | ||||
| 	form:          NFD, | ||||
| 	composing:     false, | ||||
| 	compatibility: false, | ||||
| 	info:          lookupInfoNFC, | ||||
| 	nextMain:      nextDecomposed, | ||||
| }, { | ||||
| 	form:          NFKC, | ||||
| 	composing:     true, | ||||
| 	compatibility: true, | ||||
| 	info:          lookupInfoNFKC, | ||||
| 	nextMain:      nextComposed, | ||||
| }, { | ||||
| 	form:          NFKD, | ||||
| 	composing:     false, | ||||
| 	compatibility: true, | ||||
| 	info:          lookupInfoNFKC, | ||||
| 	nextMain:      nextDecomposed, | ||||
| }} | ||||
|  | ||||
| // We do not distinguish between boundaries for NFC, NFD, etc. to avoid | ||||
| // unexpected behavior for the user.  For example, in NFD, there is a boundary | ||||
| // after 'a'.  However, 'a' might combine with modifiers, so from the application's | ||||
| // perspective it is not a good boundary. We will therefore always use the | ||||
| // boundaries for the combining variants. | ||||
|  | ||||
| // BoundaryBefore returns true if this rune starts a new segment and | ||||
| // cannot combine with any rune on the left. | ||||
| func (p Properties) BoundaryBefore() bool { | ||||
| 	if p.ccc == 0 && !p.combinesBackward() { | ||||
| 		return true | ||||
| 	} | ||||
| 	// We assume that the CCC of the first character in a decomposition | ||||
| 	// is always non-zero if different from info.ccc and that we can return | ||||
| 	// false at this point. This is verified by maketables. | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // BoundaryAfter returns true if runes cannot combine with or otherwise | ||||
| // interact with this or previous runes. | ||||
| func (p Properties) BoundaryAfter() bool { | ||||
| 	// TODO: loosen these conditions. | ||||
| 	return p.isInert() | ||||
| } | ||||
|  | ||||
| // We pack quick check data in 4 bits: | ||||
| //   5:    Combines forward  (0 == false, 1 == true) | ||||
| //   4..3: NFC_QC Yes(00), No (10), or Maybe (11) | ||||
| //   2:    NFD_QC Yes (0) or No (1). No also means there is a decomposition. | ||||
| //   1..0: Number of trailing non-starters. | ||||
| // | ||||
| // When all 4 bits are zero, the character is inert, meaning it is never | ||||
| // influenced by normalization. | ||||
| type qcInfo uint8 | ||||
|  | ||||
| func (p Properties) isYesC() bool { return p.flags&0x10 == 0 } | ||||
| func (p Properties) isYesD() bool { return p.flags&0x4 == 0 } | ||||
|  | ||||
| func (p Properties) combinesForward() bool  { return p.flags&0x20 != 0 } | ||||
| func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe | ||||
| func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD | ||||
|  | ||||
| func (p Properties) isInert() bool { | ||||
| 	return p.flags&qcInfoMask == 0 && p.ccc == 0 | ||||
| } | ||||
|  | ||||
| func (p Properties) multiSegment() bool { | ||||
| 	return p.index >= firstMulti && p.index < endMulti | ||||
| } | ||||
|  | ||||
| func (p Properties) nLeadingNonStarters() uint8 { | ||||
| 	return p.nLead | ||||
| } | ||||
|  | ||||
| func (p Properties) nTrailingNonStarters() uint8 { | ||||
| 	return uint8(p.flags & 0x03) | ||||
| } | ||||
|  | ||||
| // Decomposition returns the decomposition for the underlying rune | ||||
| // or nil if there is none. | ||||
| func (p Properties) Decomposition() []byte { | ||||
| 	// TODO: create the decomposition for Hangul? | ||||
| 	if p.index == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	i := p.index | ||||
| 	n := decomps[i] & headerLenMask | ||||
| 	i++ | ||||
| 	return decomps[i : i+uint16(n)] | ||||
| } | ||||
|  | ||||
| // Size returns the length of UTF-8 encoding of the rune. | ||||
| func (p Properties) Size() int { | ||||
| 	return int(p.size) | ||||
| } | ||||
|  | ||||
| // CCC returns the canonical combining class of the underlying rune. | ||||
| func (p Properties) CCC() uint8 { | ||||
| 	if p.index >= firstCCCZeroExcept { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return ccc[p.ccc] | ||||
| } | ||||
|  | ||||
| // LeadCCC returns the CCC of the first rune in the decomposition. | ||||
| // If there is no decomposition, LeadCCC equals CCC. | ||||
| func (p Properties) LeadCCC() uint8 { | ||||
| 	return ccc[p.ccc] | ||||
| } | ||||
|  | ||||
| // TrailCCC returns the CCC of the last rune in the decomposition. | ||||
| // If there is no decomposition, TrailCCC equals CCC. | ||||
| func (p Properties) TrailCCC() uint8 { | ||||
| 	return ccc[p.tccc] | ||||
| } | ||||
|  | ||||
| func buildRecompMap() { | ||||
| 	recompMap = make(map[uint32]rune, len(recompMapPacked)/8) | ||||
| 	var buf [8]byte | ||||
| 	for i := 0; i < len(recompMapPacked); i += 8 { | ||||
| 		copy(buf[:], recompMapPacked[i:i+8]) | ||||
| 		key := binary.BigEndian.Uint32(buf[:4]) | ||||
| 		val := binary.BigEndian.Uint32(buf[4:]) | ||||
| 		recompMap[key] = rune(val) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Recomposition | ||||
| // We use 32-bit keys instead of 64-bit for the two codepoint keys. | ||||
| // This clips off the bits of three entries, but we know this will not | ||||
| // result in a collision. In the unlikely event that changes to | ||||
| // UnicodeData.txt introduce collisions, the compiler will catch it. | ||||
| // Note that the recomposition map for NFC and NFKC are identical. | ||||
|  | ||||
| // combine returns the combined rune or 0 if it doesn't exist. | ||||
| // | ||||
| // The caller is responsible for calling | ||||
| // recompMapOnce.Do(buildRecompMap) sometime before this is called. | ||||
| func combine(a, b rune) rune { | ||||
| 	key := uint32(uint16(a))<<16 + uint32(uint16(b)) | ||||
| 	if recompMap == nil { | ||||
| 		panic("caller error") // see func comment | ||||
| 	} | ||||
| 	return recompMap[key] | ||||
| } | ||||
|  | ||||
| func lookupInfoNFC(b input, i int) Properties { | ||||
| 	v, sz := b.charinfoNFC(i) | ||||
| 	return compInfo(v, sz) | ||||
| } | ||||
|  | ||||
| func lookupInfoNFKC(b input, i int) Properties { | ||||
| 	v, sz := b.charinfoNFKC(i) | ||||
| 	return compInfo(v, sz) | ||||
| } | ||||
|  | ||||
| // Properties returns properties for the first rune in s. | ||||
| func (f Form) Properties(s []byte) Properties { | ||||
| 	if f == NFC || f == NFD { | ||||
| 		return compInfo(nfcData.lookup(s)) | ||||
| 	} | ||||
| 	return compInfo(nfkcData.lookup(s)) | ||||
| } | ||||
|  | ||||
| // PropertiesString returns properties for the first rune in s. | ||||
| func (f Form) PropertiesString(s string) Properties { | ||||
| 	if f == NFC || f == NFD { | ||||
| 		return compInfo(nfcData.lookupString(s)) | ||||
| 	} | ||||
| 	return compInfo(nfkcData.lookupString(s)) | ||||
| } | ||||
|  | ||||
| // compInfo converts the information contained in v and sz | ||||
| // to a Properties.  See the comment at the top of the file | ||||
| // for more information on the format. | ||||
| func compInfo(v uint16, sz int) Properties { | ||||
| 	if v == 0 { | ||||
| 		return Properties{size: uint8(sz)} | ||||
| 	} else if v >= 0x8000 { | ||||
| 		p := Properties{ | ||||
| 			size:  uint8(sz), | ||||
| 			ccc:   uint8(v), | ||||
| 			tccc:  uint8(v), | ||||
| 			flags: qcInfo(v >> 8), | ||||
| 		} | ||||
| 		if p.ccc > 0 || p.combinesBackward() { | ||||
| 			p.nLead = uint8(p.flags & 0x3) | ||||
| 		} | ||||
| 		return p | ||||
| 	} | ||||
| 	// has decomposition | ||||
| 	h := decomps[v] | ||||
| 	f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4 | ||||
| 	p := Properties{size: uint8(sz), flags: f, index: v} | ||||
| 	if v >= firstCCC { | ||||
| 		v += uint16(h&headerLenMask) + 1 | ||||
| 		c := decomps[v] | ||||
| 		p.tccc = c >> 2 | ||||
| 		p.flags |= qcInfo(c & 0x3) | ||||
| 		if v >= firstLeadingCCC { | ||||
| 			p.nLead = c & 0x3 | ||||
| 			if v >= firstStarterWithNLead { | ||||
| 				// We were tricked. Remove the decomposition. | ||||
| 				p.flags &= 0x03 | ||||
| 				p.index = 0 | ||||
| 				return p | ||||
| 			} | ||||
| 			p.ccc = decomps[v+1] | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
							
								
								
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| type input struct { | ||||
| 	str   string | ||||
| 	bytes []byte | ||||
| } | ||||
|  | ||||
| func inputBytes(str []byte) input { | ||||
| 	return input{bytes: str} | ||||
| } | ||||
|  | ||||
| func inputString(str string) input { | ||||
| 	return input{str: str} | ||||
| } | ||||
|  | ||||
| func (in *input) setBytes(str []byte) { | ||||
| 	in.str = "" | ||||
| 	in.bytes = str | ||||
| } | ||||
|  | ||||
| func (in *input) setString(str string) { | ||||
| 	in.str = str | ||||
| 	in.bytes = nil | ||||
| } | ||||
|  | ||||
| func (in *input) _byte(p int) byte { | ||||
| 	if in.bytes == nil { | ||||
| 		return in.str[p] | ||||
| 	} | ||||
| 	return in.bytes[p] | ||||
| } | ||||
|  | ||||
| func (in *input) skipASCII(p, max int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		for ; p < max && in.str[p] < utf8.RuneSelf; p++ { | ||||
| 		} | ||||
| 	} else { | ||||
| 		for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ { | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| func (in *input) skipContinuationBytes(p int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ { | ||||
| 		} | ||||
| 	} else { | ||||
| 		for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ { | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| func (in *input) appendSlice(buf []byte, b, e int) []byte { | ||||
| 	if in.bytes != nil { | ||||
| 		return append(buf, in.bytes[b:e]...) | ||||
| 	} | ||||
| 	for i := b; i < e; i++ { | ||||
| 		buf = append(buf, in.str[i]) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| func (in *input) copySlice(buf []byte, b, e int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		return copy(buf, in.str[b:e]) | ||||
| 	} | ||||
| 	return copy(buf, in.bytes[b:e]) | ||||
| } | ||||
|  | ||||
| func (in *input) charinfoNFC(p int) (uint16, int) { | ||||
| 	if in.bytes == nil { | ||||
| 		return nfcData.lookupString(in.str[p:]) | ||||
| 	} | ||||
| 	return nfcData.lookup(in.bytes[p:]) | ||||
| } | ||||
|  | ||||
| func (in *input) charinfoNFKC(p int) (uint16, int) { | ||||
| 	if in.bytes == nil { | ||||
| 		return nfkcData.lookupString(in.str[p:]) | ||||
| 	} | ||||
| 	return nfkcData.lookup(in.bytes[p:]) | ||||
| } | ||||
|  | ||||
| func (in *input) hangul(p int) (r rune) { | ||||
| 	var size int | ||||
| 	if in.bytes == nil { | ||||
| 		if !isHangulString(in.str[p:]) { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		r, size = utf8.DecodeRuneInString(in.str[p:]) | ||||
| 	} else { | ||||
| 		if !isHangul(in.bytes[p:]) { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		r, size = utf8.DecodeRune(in.bytes[p:]) | ||||
| 	} | ||||
| 	if size != hangulUTF8Size { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return r | ||||
| } | ||||
							
								
								
									
										458
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										458
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,458 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| // MaxSegmentSize is the maximum size of a byte buffer needed to consider any | ||||
| // sequence of starter and non-starter runes for the purpose of normalization. | ||||
| const MaxSegmentSize = maxByteBufferSize | ||||
|  | ||||
| // An Iter iterates over a string or byte slice, while normalizing it | ||||
| // to a given Form. | ||||
| type Iter struct { | ||||
| 	rb     reorderBuffer | ||||
| 	buf    [maxByteBufferSize]byte | ||||
| 	info   Properties // first character saved from previous iteration | ||||
| 	next   iterFunc   // implementation of next depends on form | ||||
| 	asciiF iterFunc | ||||
|  | ||||
| 	p        int    // current position in input source | ||||
| 	multiSeg []byte // remainder of multi-segment decomposition | ||||
| } | ||||
|  | ||||
| type iterFunc func(*Iter) []byte | ||||
|  | ||||
| // Init initializes i to iterate over src after normalizing it to Form f. | ||||
| func (i *Iter) Init(f Form, src []byte) { | ||||
| 	i.p = 0 | ||||
| 	if len(src) == 0 { | ||||
| 		i.setDone() | ||||
| 		i.rb.nsrc = 0 | ||||
| 		return | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.rb.init(f, src) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.asciiF = nextASCIIBytes | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| } | ||||
|  | ||||
| // InitString initializes i to iterate over src after normalizing it to Form f. | ||||
| func (i *Iter) InitString(f Form, src string) { | ||||
| 	i.p = 0 | ||||
| 	if len(src) == 0 { | ||||
| 		i.setDone() | ||||
| 		i.rb.nsrc = 0 | ||||
| 		return | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.rb.initString(f, src) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.asciiF = nextASCIIString | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| } | ||||
|  | ||||
| // Seek sets the segment to be returned by the next call to Next to start | ||||
| // at position p.  It is the responsibility of the caller to set p to the | ||||
| // start of a segment. | ||||
| func (i *Iter) Seek(offset int64, whence int) (int64, error) { | ||||
| 	var abs int64 | ||||
| 	switch whence { | ||||
| 	case 0: | ||||
| 		abs = offset | ||||
| 	case 1: | ||||
| 		abs = int64(i.p) + offset | ||||
| 	case 2: | ||||
| 		abs = int64(i.rb.nsrc) + offset | ||||
| 	default: | ||||
| 		return 0, fmt.Errorf("norm: invalid whence") | ||||
| 	} | ||||
| 	if abs < 0 { | ||||
| 		return 0, fmt.Errorf("norm: negative position") | ||||
| 	} | ||||
| 	if int(abs) >= i.rb.nsrc { | ||||
| 		i.setDone() | ||||
| 		return int64(i.p), nil | ||||
| 	} | ||||
| 	i.p = int(abs) | ||||
| 	i.multiSeg = nil | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	return abs, nil | ||||
| } | ||||
|  | ||||
| // returnSlice returns a slice of the underlying input type as a byte slice. | ||||
| // If the underlying is of type []byte, it will simply return a slice. | ||||
| // If the underlying is of type string, it will copy the slice to the buffer | ||||
| // and return that. | ||||
| func (i *Iter) returnSlice(a, b int) []byte { | ||||
| 	if i.rb.src.bytes == nil { | ||||
| 		return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])] | ||||
| 	} | ||||
| 	return i.rb.src.bytes[a:b] | ||||
| } | ||||
|  | ||||
| // Pos returns the byte position at which the next call to Next will commence processing. | ||||
| func (i *Iter) Pos() int { | ||||
| 	return i.p | ||||
| } | ||||
|  | ||||
| func (i *Iter) setDone() { | ||||
| 	i.next = nextDone | ||||
| 	i.p = i.rb.nsrc | ||||
| } | ||||
|  | ||||
| // Done returns true if there is no more input to process. | ||||
| func (i *Iter) Done() bool { | ||||
| 	return i.p >= i.rb.nsrc | ||||
| } | ||||
|  | ||||
| // Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input. | ||||
| // For any input a and b for which f(a) == f(b), subsequent calls | ||||
| // to Next will return the same segments. | ||||
| // Modifying runes are grouped together with the preceding starter, if such a starter exists. | ||||
| // Although not guaranteed, n will typically be the smallest possible n. | ||||
| func (i *Iter) Next() []byte { | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextASCIIBytes(i *Iter) []byte { | ||||
| 	p := i.p + 1 | ||||
| 	if p >= i.rb.nsrc { | ||||
| 		p0 := i.p | ||||
| 		i.setDone() | ||||
| 		return i.rb.src.bytes[p0:p] | ||||
| 	} | ||||
| 	if i.rb.src.bytes[p] < utf8.RuneSelf { | ||||
| 		p0 := i.p | ||||
| 		i.p = p | ||||
| 		return i.rb.src.bytes[p0:p] | ||||
| 	} | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextASCIIString(i *Iter) []byte { | ||||
| 	p := i.p + 1 | ||||
| 	if p >= i.rb.nsrc { | ||||
| 		i.buf[0] = i.rb.src.str[i.p] | ||||
| 		i.setDone() | ||||
| 		return i.buf[:1] | ||||
| 	} | ||||
| 	if i.rb.src.str[p] < utf8.RuneSelf { | ||||
| 		i.buf[0] = i.rb.src.str[i.p] | ||||
| 		i.p = p | ||||
| 		return i.buf[:1] | ||||
| 	} | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextHangul(i *Iter) []byte { | ||||
| 	p := i.p | ||||
| 	next := p + hangulUTF8Size | ||||
| 	if next >= i.rb.nsrc { | ||||
| 		i.setDone() | ||||
| 	} else if i.rb.src.hangul(next) == 0 { | ||||
| 		i.rb.ss.next(i.info) | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		i.next = i.rb.f.nextMain | ||||
| 		return i.next(i) | ||||
| 	} | ||||
| 	i.p = next | ||||
| 	return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))] | ||||
| } | ||||
|  | ||||
| func nextDone(i *Iter) []byte { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // nextMulti is used for iterating over multi-segment decompositions | ||||
| // for decomposing normal forms. | ||||
| func nextMulti(i *Iter) []byte { | ||||
| 	j := 0 | ||||
| 	d := i.multiSeg | ||||
| 	// skip first rune | ||||
| 	for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ { | ||||
| 	} | ||||
| 	for j < len(d) { | ||||
| 		info := i.rb.f.info(input{bytes: d}, j) | ||||
| 		if info.BoundaryBefore() { | ||||
| 			i.multiSeg = d[j:] | ||||
| 			return d[:j] | ||||
| 		} | ||||
| 		j += int(info.size) | ||||
| 	} | ||||
| 	// treat last segment as normal decomposition | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| // nextMultiNorm is used for iterating over multi-segment decompositions | ||||
| // for composing normal forms. | ||||
| func nextMultiNorm(i *Iter) []byte { | ||||
| 	j := 0 | ||||
| 	d := i.multiSeg | ||||
| 	for j < len(d) { | ||||
| 		info := i.rb.f.info(input{bytes: d}, j) | ||||
| 		if info.BoundaryBefore() { | ||||
| 			i.rb.compose() | ||||
| 			seg := i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| 			i.rb.insertUnsafe(input{bytes: d}, j, info) | ||||
| 			i.multiSeg = d[j+int(info.size):] | ||||
| 			return seg | ||||
| 		} | ||||
| 		i.rb.insertUnsafe(input{bytes: d}, j, info) | ||||
| 		j += int(info.size) | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.next = nextComposed | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
|  | ||||
| // nextDecomposed is the implementation of Next for forms NFD and NFKD. | ||||
| func nextDecomposed(i *Iter) (next []byte) { | ||||
| 	outp := 0 | ||||
| 	inCopyStart, outCopyStart := i.p, 0 | ||||
| 	for { | ||||
| 		if sz := int(i.info.size); sz <= 1 { | ||||
| 			i.rb.ss = 0 | ||||
| 			p := i.p | ||||
| 			i.p++ // ASCII or illegal byte.  Either way, advance by 1. | ||||
| 			if i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				return i.returnSlice(p, i.p) | ||||
| 			} else if i.rb.src._byte(i.p) < utf8.RuneSelf { | ||||
| 				i.next = i.asciiF | ||||
| 				return i.returnSlice(p, i.p) | ||||
| 			} | ||||
| 			outp++ | ||||
| 		} else if d := i.info.Decomposition(); d != nil { | ||||
| 			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero. | ||||
| 			// Case 1: there is a leftover to copy.  In this case the decomposition | ||||
| 			// must begin with a modifier and should always be appended. | ||||
| 			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value. | ||||
| 			p := outp + len(d) | ||||
| 			if outp > 0 { | ||||
| 				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 				// TODO: this condition should not be possible, but we leave it | ||||
| 				// in for defensive purposes. | ||||
| 				if p > len(i.buf) { | ||||
| 					return i.buf[:outp] | ||||
| 				} | ||||
| 			} else if i.info.multiSegment() { | ||||
| 				// outp must be 0 as multi-segment decompositions always | ||||
| 				// start a new segment. | ||||
| 				if i.multiSeg == nil { | ||||
| 					i.multiSeg = d | ||||
| 					i.next = nextMulti | ||||
| 					return nextMulti(i) | ||||
| 				} | ||||
| 				// We are in the last segment.  Treat as normal decomposition. | ||||
| 				d = i.multiSeg | ||||
| 				i.multiSeg = nil | ||||
| 				p = len(d) | ||||
| 			} | ||||
| 			prevCC := i.info.tccc | ||||
| 			if i.p += sz; i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				i.info = Properties{} // Force BoundaryBefore to succeed. | ||||
| 			} else { | ||||
| 				i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 			} | ||||
| 			switch i.rb.ss.next(i.info) { | ||||
| 			case ssOverflow: | ||||
| 				i.next = nextCGJDecompose | ||||
| 				fallthrough | ||||
| 			case ssStarter: | ||||
| 				if outp > 0 { | ||||
| 					copy(i.buf[outp:], d) | ||||
| 					return i.buf[:p] | ||||
| 				} | ||||
| 				return d | ||||
| 			} | ||||
| 			copy(i.buf[outp:], d) | ||||
| 			outp = p | ||||
| 			inCopyStart, outCopyStart = i.p, outp | ||||
| 			if i.info.ccc < prevCC { | ||||
| 				goto doNorm | ||||
| 			} | ||||
| 			continue | ||||
| 		} else if r := i.rb.src.hangul(i.p); r != 0 { | ||||
| 			outp = decomposeHangul(i.buf[:], r) | ||||
| 			i.p += hangulUTF8Size | ||||
| 			inCopyStart, outCopyStart = i.p, outp | ||||
| 			if i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				break | ||||
| 			} else if i.rb.src.hangul(i.p) != 0 { | ||||
| 				i.next = nextHangul | ||||
| 				return i.buf[:outp] | ||||
| 			} | ||||
| 		} else { | ||||
| 			p := outp + sz | ||||
| 			if p > len(i.buf) { | ||||
| 				break | ||||
| 			} | ||||
| 			outp = p | ||||
| 			i.p += sz | ||||
| 		} | ||||
| 		if i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		prevCC := i.info.tccc | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if v := i.rb.ss.next(i.info); v == ssStarter { | ||||
| 			break | ||||
| 		} else if v == ssOverflow { | ||||
| 			i.next = nextCGJDecompose | ||||
| 			break | ||||
| 		} | ||||
| 		if i.info.ccc < prevCC { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 	} | ||||
| 	if outCopyStart == 0 { | ||||
| 		return i.returnSlice(inCopyStart, i.p) | ||||
| 	} else if inCopyStart < i.p { | ||||
| 		i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 	} | ||||
| 	return i.buf[:outp] | ||||
| doNorm: | ||||
| 	// Insert what we have decomposed so far in the reorderBuffer. | ||||
| 	// As we will only reorder, there will always be enough room. | ||||
| 	i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 	i.rb.insertDecomposed(i.buf[0:outp]) | ||||
| 	return doNormDecomposed(i) | ||||
| } | ||||
|  | ||||
| func doNormDecomposed(i *Iter) []byte { | ||||
| 	for { | ||||
| 		i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 		if i.p += int(i.info.size); i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if i.info.ccc == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if s := i.rb.ss.next(i.info); s == ssOverflow { | ||||
| 			i.next = nextCGJDecompose | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	// new segment or too many combining characters: exit normalization | ||||
| 	return i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| } | ||||
|  | ||||
| func nextCGJDecompose(i *Iter) []byte { | ||||
| 	i.rb.ss = 0 | ||||
| 	i.rb.insertCGJ() | ||||
| 	i.next = nextDecomposed | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	buf := doNormDecomposed(i) | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| // nextComposed is the implementation of Next for forms NFC and NFKC. | ||||
| func nextComposed(i *Iter) []byte { | ||||
| 	outp, startp := 0, i.p | ||||
| 	var prevCC uint8 | ||||
| 	for { | ||||
| 		if !i.info.isYesC() { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 		prevCC = i.info.tccc | ||||
| 		sz := int(i.info.size) | ||||
| 		if sz == 0 { | ||||
| 			sz = 1 // illegal rune: copy byte-by-byte | ||||
| 		} | ||||
| 		p := outp + sz | ||||
| 		if p > len(i.buf) { | ||||
| 			break | ||||
| 		} | ||||
| 		outp = p | ||||
| 		i.p += sz | ||||
| 		if i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} else if i.rb.src._byte(i.p) < utf8.RuneSelf { | ||||
| 			i.rb.ss = 0 | ||||
| 			i.next = i.asciiF | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if v := i.rb.ss.next(i.info); v == ssStarter { | ||||
| 			break | ||||
| 		} else if v == ssOverflow { | ||||
| 			i.next = nextCGJCompose | ||||
| 			break | ||||
| 		} | ||||
| 		if i.info.ccc < prevCC { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 	} | ||||
| 	return i.returnSlice(startp, i.p) | ||||
| doNorm: | ||||
| 	// reset to start position | ||||
| 	i.p = startp | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	if i.info.multiSegment() { | ||||
| 		d := i.info.Decomposition() | ||||
| 		info := i.rb.f.info(input{bytes: d}, 0) | ||||
| 		i.rb.insertUnsafe(input{bytes: d}, 0, info) | ||||
| 		i.multiSeg = d[int(info.size):] | ||||
| 		i.next = nextMultiNorm | ||||
| 		return nextMultiNorm(i) | ||||
| 	} | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
|  | ||||
| func doNormComposed(i *Iter) []byte { | ||||
| 	// First rune should already be inserted. | ||||
| 	for { | ||||
| 		if i.p += int(i.info.size); i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if s := i.rb.ss.next(i.info); s == ssStarter { | ||||
| 			break | ||||
| 		} else if s == ssOverflow { | ||||
| 			i.next = nextCGJCompose | ||||
| 			break | ||||
| 		} | ||||
| 		i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	} | ||||
| 	i.rb.compose() | ||||
| 	seg := i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| 	return seg | ||||
| } | ||||
|  | ||||
| func nextCGJCompose(i *Iter) []byte { | ||||
| 	i.rb.ss = 0 // instead of first | ||||
| 	i.rb.insertCGJ() | ||||
| 	i.next = nextComposed | ||||
| 	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter, | ||||
| 	// even if they are not. This is particularly dubious for U+FF9E and UFF9A. | ||||
| 	// If we ever change that, insert a check here. | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
							
								
								
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,609 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // Note: the file data_test.go that is generated should not be checked in. | ||||
| //go:generate go run maketables.go triegen.go | ||||
| //go:generate go test -tags test | ||||
|  | ||||
| // Package norm contains types and functions for normalizing Unicode strings. | ||||
| package norm // import "golang.org/x/text/unicode/norm" | ||||
|  | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| // A Form denotes a canonical representation of Unicode code points. | ||||
| // The Unicode-defined normalization and equivalence forms are: | ||||
| // | ||||
| //   NFC   Unicode Normalization Form C | ||||
| //   NFD   Unicode Normalization Form D | ||||
| //   NFKC  Unicode Normalization Form KC | ||||
| //   NFKD  Unicode Normalization Form KD | ||||
| // | ||||
| // For a Form f, this documentation uses the notation f(x) to mean | ||||
| // the bytes or string x converted to the given form. | ||||
| // A position n in x is called a boundary if conversion to the form can | ||||
| // proceed independently on both sides: | ||||
| //   f(x) == append(f(x[0:n]), f(x[n:])...) | ||||
| // | ||||
| // References: https://unicode.org/reports/tr15/ and | ||||
| // https://unicode.org/notes/tn5/. | ||||
| type Form int | ||||
|  | ||||
| const ( | ||||
| 	NFC Form = iota | ||||
| 	NFD | ||||
| 	NFKC | ||||
| 	NFKD | ||||
| ) | ||||
|  | ||||
| // Bytes returns f(b). May return b if f(b) = b. | ||||
| func (f Form) Bytes(b []byte) []byte { | ||||
| 	src := inputBytes(b) | ||||
| 	ft := formTable[f] | ||||
| 	n, ok := ft.quickSpan(src, 0, len(b), true) | ||||
| 	if ok { | ||||
| 		return b | ||||
| 	} | ||||
| 	out := make([]byte, n, len(b)) | ||||
| 	copy(out, b[0:n]) | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush} | ||||
| 	return doAppendInner(&rb, n) | ||||
| } | ||||
|  | ||||
| // String returns f(s). | ||||
| func (f Form) String(s string) string { | ||||
| 	src := inputString(s) | ||||
| 	ft := formTable[f] | ||||
| 	n, ok := ft.quickSpan(src, 0, len(s), true) | ||||
| 	if ok { | ||||
| 		return s | ||||
| 	} | ||||
| 	out := make([]byte, n, len(s)) | ||||
| 	copy(out, s[0:n]) | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush} | ||||
| 	return string(doAppendInner(&rb, n)) | ||||
| } | ||||
|  | ||||
| // IsNormal returns true if b == f(b). | ||||
| func (f Form) IsNormal(b []byte) bool { | ||||
| 	src := inputBytes(b) | ||||
| 	ft := formTable[f] | ||||
| 	bp, ok := ft.quickSpan(src, 0, len(b), true) | ||||
| 	if ok { | ||||
| 		return true | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)} | ||||
| 	rb.setFlusher(nil, cmpNormalBytes) | ||||
| 	for bp < len(b) { | ||||
| 		rb.out = b[bp:] | ||||
| 		if bp = decomposeSegment(&rb, bp, true); bp < 0 { | ||||
| 			return false | ||||
| 		} | ||||
| 		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func cmpNormalBytes(rb *reorderBuffer) bool { | ||||
| 	b := rb.out | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		info := rb.rune[i] | ||||
| 		if int(info.size) > len(b) { | ||||
| 			return false | ||||
| 		} | ||||
| 		p := info.pos | ||||
| 		pe := p + info.size | ||||
| 		for ; p < pe; p++ { | ||||
| 			if b[0] != rb.byte[p] { | ||||
| 				return false | ||||
| 			} | ||||
| 			b = b[1:] | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // IsNormalString returns true if s == f(s). | ||||
| func (f Form) IsNormalString(s string) bool { | ||||
| 	src := inputString(s) | ||||
| 	ft := formTable[f] | ||||
| 	bp, ok := ft.quickSpan(src, 0, len(s), true) | ||||
| 	if ok { | ||||
| 		return true | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)} | ||||
| 	rb.setFlusher(nil, func(rb *reorderBuffer) bool { | ||||
| 		for i := 0; i < rb.nrune; i++ { | ||||
| 			info := rb.rune[i] | ||||
| 			if bp+int(info.size) > len(s) { | ||||
| 				return false | ||||
| 			} | ||||
| 			p := info.pos | ||||
| 			pe := p + info.size | ||||
| 			for ; p < pe; p++ { | ||||
| 				if s[bp] != rb.byte[p] { | ||||
| 					return false | ||||
| 				} | ||||
| 				bp++ | ||||
| 			} | ||||
| 		} | ||||
| 		return true | ||||
| 	}) | ||||
| 	for bp < len(s) { | ||||
| 		if bp = decomposeSegment(&rb, bp, true); bp < 0 { | ||||
| 			return false | ||||
| 		} | ||||
| 		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // patchTail fixes a case where a rune may be incorrectly normalized | ||||
| // if it is followed by illegal continuation bytes. It returns the | ||||
| // patched buffer and whether the decomposition is still in progress. | ||||
| func patchTail(rb *reorderBuffer) bool { | ||||
| 	info, p := lastRuneStart(&rb.f, rb.out) | ||||
| 	if p == -1 || info.size == 0 { | ||||
| 		return true | ||||
| 	} | ||||
| 	end := p + int(info.size) | ||||
| 	extra := len(rb.out) - end | ||||
| 	if extra > 0 { | ||||
| 		// Potentially allocating memory. However, this only | ||||
| 		// happens with ill-formed UTF-8. | ||||
| 		x := make([]byte, 0) | ||||
| 		x = append(x, rb.out[len(rb.out)-extra:]...) | ||||
| 		rb.out = rb.out[:end] | ||||
| 		decomposeToLastBoundary(rb) | ||||
| 		rb.doFlush() | ||||
| 		rb.out = append(rb.out, x...) | ||||
| 		return false | ||||
| 	} | ||||
| 	buf := rb.out[p:] | ||||
| 	rb.out = rb.out[:p] | ||||
| 	decomposeToLastBoundary(rb) | ||||
| 	if s := rb.ss.next(info); s == ssStarter { | ||||
| 		rb.doFlush() | ||||
| 		rb.ss.first(info) | ||||
| 	} else if s == ssOverflow { | ||||
| 		rb.doFlush() | ||||
| 		rb.insertCGJ() | ||||
| 		rb.ss = 0 | ||||
| 	} | ||||
| 	rb.insertUnsafe(inputBytes(buf), 0, info) | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func appendQuick(rb *reorderBuffer, i int) int { | ||||
| 	if rb.nsrc == i { | ||||
| 		return i | ||||
| 	} | ||||
| 	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true) | ||||
| 	rb.out = rb.src.appendSlice(rb.out, i, end) | ||||
| 	return end | ||||
| } | ||||
|  | ||||
| // Append returns f(append(out, b...)). | ||||
| // The buffer out must be nil, empty, or equal to f(out). | ||||
| func (f Form) Append(out []byte, src ...byte) []byte { | ||||
| 	return f.doAppend(out, inputBytes(src), len(src)) | ||||
| } | ||||
|  | ||||
| func (f Form) doAppend(out []byte, src input, n int) []byte { | ||||
| 	if n == 0 { | ||||
| 		return out | ||||
| 	} | ||||
| 	ft := formTable[f] | ||||
| 	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer. | ||||
| 	if len(out) == 0 { | ||||
| 		p, _ := ft.quickSpan(src, 0, n, true) | ||||
| 		out = src.appendSlice(out, 0, p) | ||||
| 		if p == n { | ||||
| 			return out | ||||
| 		} | ||||
| 		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush} | ||||
| 		return doAppendInner(&rb, p) | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: n} | ||||
| 	return doAppend(&rb, out, 0) | ||||
| } | ||||
|  | ||||
| func doAppend(rb *reorderBuffer, out []byte, p int) []byte { | ||||
| 	rb.setFlusher(out, appendFlush) | ||||
| 	src, n := rb.src, rb.nsrc | ||||
| 	doMerge := len(out) > 0 | ||||
| 	if q := src.skipContinuationBytes(p); q > p { | ||||
| 		// Move leading non-starters to destination. | ||||
| 		rb.out = src.appendSlice(rb.out, p, q) | ||||
| 		p = q | ||||
| 		doMerge = patchTail(rb) | ||||
| 	} | ||||
| 	fd := &rb.f | ||||
| 	if doMerge { | ||||
| 		var info Properties | ||||
| 		if p < n { | ||||
| 			info = fd.info(src, p) | ||||
| 			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 { | ||||
| 				if p == 0 { | ||||
| 					decomposeToLastBoundary(rb) | ||||
| 				} | ||||
| 				p = decomposeSegment(rb, p, true) | ||||
| 			} | ||||
| 		} | ||||
| 		if info.size == 0 { | ||||
| 			rb.doFlush() | ||||
| 			// Append incomplete UTF-8 encoding. | ||||
| 			return src.appendSlice(rb.out, p, n) | ||||
| 		} | ||||
| 		if rb.nrune > 0 { | ||||
| 			return doAppendInner(rb, p) | ||||
| 		} | ||||
| 	} | ||||
| 	p = appendQuick(rb, p) | ||||
| 	return doAppendInner(rb, p) | ||||
| } | ||||
|  | ||||
| func doAppendInner(rb *reorderBuffer, p int) []byte { | ||||
| 	for n := rb.nsrc; p < n; { | ||||
| 		p = decomposeSegment(rb, p, true) | ||||
| 		p = appendQuick(rb, p) | ||||
| 	} | ||||
| 	return rb.out | ||||
| } | ||||
|  | ||||
| // AppendString returns f(append(out, []byte(s))). | ||||
| // The buffer out must be nil, empty, or equal to f(out). | ||||
| func (f Form) AppendString(out []byte, src string) []byte { | ||||
| 	return f.doAppend(out, inputString(src), len(src)) | ||||
| } | ||||
|  | ||||
| // QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) QuickSpan(b []byte) int { | ||||
| 	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true) | ||||
| 	return n | ||||
| } | ||||
|  | ||||
| // Span implements transform.SpanningTransformer. It returns a boundary n such | ||||
| // that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n. | ||||
| func (f Form) Span(b []byte, atEOF bool) (n int, err error) { | ||||
| 	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF) | ||||
| 	if n < len(b) { | ||||
| 		if !ok { | ||||
| 			err = transform.ErrEndOfSpan | ||||
| 		} else { | ||||
| 			err = transform.ErrShortSrc | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // SpanString returns a boundary n such that s[0:n] == f(s[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) SpanString(s string, atEOF bool) (n int, err error) { | ||||
| 	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF) | ||||
| 	if n < len(s) { | ||||
| 		if !ok { | ||||
| 			err = transform.ErrEndOfSpan | ||||
| 		} else { | ||||
| 			err = transform.ErrShortSrc | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and | ||||
| // whether any non-normalized parts were found. If atEOF is false, n will | ||||
| // not point past the last segment if this segment might be become | ||||
| // non-normalized by appending other runes. | ||||
| func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) { | ||||
| 	var lastCC uint8 | ||||
| 	ss := streamSafe(0) | ||||
| 	lastSegStart := i | ||||
| 	for n = end; i < n; { | ||||
| 		if j := src.skipASCII(i, n); i != j { | ||||
| 			i = j | ||||
| 			lastSegStart = i - 1 | ||||
| 			lastCC = 0 | ||||
| 			ss = 0 | ||||
| 			continue | ||||
| 		} | ||||
| 		info := f.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			if atEOF { | ||||
| 				// include incomplete runes | ||||
| 				return n, true | ||||
| 			} | ||||
| 			return lastSegStart, true | ||||
| 		} | ||||
| 		// This block needs to be before the next, because it is possible to | ||||
| 		// have an overflow for runes that are starters (e.g. with U+FF9E). | ||||
| 		switch ss.next(info) { | ||||
| 		case ssStarter: | ||||
| 			lastSegStart = i | ||||
| 		case ssOverflow: | ||||
| 			return lastSegStart, false | ||||
| 		case ssSuccess: | ||||
| 			if lastCC > info.ccc { | ||||
| 				return lastSegStart, false | ||||
| 			} | ||||
| 		} | ||||
| 		if f.composing { | ||||
| 			if !info.isYesC() { | ||||
| 				break | ||||
| 			} | ||||
| 		} else { | ||||
| 			if !info.isYesD() { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		lastCC = info.ccc | ||||
| 		i += int(info.size) | ||||
| 	} | ||||
| 	if i == n { | ||||
| 		if !atEOF { | ||||
| 			n = lastSegStart | ||||
| 		} | ||||
| 		return n, true | ||||
| 	} | ||||
| 	return lastSegStart, false | ||||
| } | ||||
|  | ||||
| // QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) QuickSpanString(s string) int { | ||||
| 	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true) | ||||
| 	return n | ||||
| } | ||||
|  | ||||
| // FirstBoundary returns the position i of the first boundary in b | ||||
| // or -1 if b contains no boundary. | ||||
| func (f Form) FirstBoundary(b []byte) int { | ||||
| 	return f.firstBoundary(inputBytes(b), len(b)) | ||||
| } | ||||
|  | ||||
| func (f Form) firstBoundary(src input, nsrc int) int { | ||||
| 	i := src.skipContinuationBytes(0) | ||||
| 	if i >= nsrc { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	fd := formTable[f] | ||||
| 	ss := streamSafe(0) | ||||
| 	// We should call ss.first here, but we can't as the first rune is | ||||
| 	// skipped already. This means FirstBoundary can't really determine | ||||
| 	// CGJ insertion points correctly. Luckily it doesn't have to. | ||||
| 	for { | ||||
| 		info := fd.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			return -1 | ||||
| 		} | ||||
| 		if s := ss.next(info); s != ssSuccess { | ||||
| 			return i | ||||
| 		} | ||||
| 		i += int(info.size) | ||||
| 		if i >= nsrc { | ||||
| 			if !info.BoundaryAfter() && !ss.isMax() { | ||||
| 				return -1 | ||||
| 			} | ||||
| 			return nsrc | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // FirstBoundaryInString returns the position i of the first boundary in s | ||||
| // or -1 if s contains no boundary. | ||||
| func (f Form) FirstBoundaryInString(s string) int { | ||||
| 	return f.firstBoundary(inputString(s), len(s)) | ||||
| } | ||||
|  | ||||
| // NextBoundary reports the index of the boundary between the first and next | ||||
| // segment in b or -1 if atEOF is false and there are not enough bytes to | ||||
| // determine this boundary. | ||||
| func (f Form) NextBoundary(b []byte, atEOF bool) int { | ||||
| 	return f.nextBoundary(inputBytes(b), len(b), atEOF) | ||||
| } | ||||
|  | ||||
| // NextBoundaryInString reports the index of the boundary between the first and | ||||
| // next segment in b or -1 if atEOF is false and there are not enough bytes to | ||||
| // determine this boundary. | ||||
| func (f Form) NextBoundaryInString(s string, atEOF bool) int { | ||||
| 	return f.nextBoundary(inputString(s), len(s), atEOF) | ||||
| } | ||||
|  | ||||
| func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { | ||||
| 	if nsrc == 0 { | ||||
| 		if atEOF { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		return -1 | ||||
| 	} | ||||
| 	fd := formTable[f] | ||||
| 	info := fd.info(src, 0) | ||||
| 	if info.size == 0 { | ||||
| 		if atEOF { | ||||
| 			return 1 | ||||
| 		} | ||||
| 		return -1 | ||||
| 	} | ||||
| 	ss := streamSafe(0) | ||||
| 	ss.first(info) | ||||
|  | ||||
| 	for i := int(info.size); i < nsrc; i += int(info.size) { | ||||
| 		info = fd.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			if atEOF { | ||||
| 				return i | ||||
| 			} | ||||
| 			return -1 | ||||
| 		} | ||||
| 		// TODO: Using streamSafe to determine the boundary isn't the same as | ||||
| 		// using BoundaryBefore. Determine which should be used. | ||||
| 		if s := ss.next(info); s != ssSuccess { | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	if !atEOF && !info.BoundaryAfter() && !ss.isMax() { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	return nsrc | ||||
| } | ||||
|  | ||||
| // LastBoundary returns the position i of the last boundary in b | ||||
| // or -1 if b contains no boundary. | ||||
| func (f Form) LastBoundary(b []byte) int { | ||||
| 	return lastBoundary(formTable[f], b) | ||||
| } | ||||
|  | ||||
| func lastBoundary(fd *formInfo, b []byte) int { | ||||
| 	i := len(b) | ||||
| 	info, p := lastRuneStart(fd, b) | ||||
| 	if p == -1 { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	if info.size == 0 { // ends with incomplete rune | ||||
| 		if p == 0 { // starts with incomplete rune | ||||
| 			return -1 | ||||
| 		} | ||||
| 		i = p | ||||
| 		info, p = lastRuneStart(fd, b[:i]) | ||||
| 		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8 | ||||
| 		return i | ||||
| 	} | ||||
| 	if info.BoundaryAfter() { | ||||
| 		return i | ||||
| 	} | ||||
| 	ss := streamSafe(0) | ||||
| 	v := ss.backwards(info) | ||||
| 	for i = p; i >= 0 && v != ssStarter; i = p { | ||||
| 		info, p = lastRuneStart(fd, b[:i]) | ||||
| 		if v = ss.backwards(info); v == ssOverflow { | ||||
| 			break | ||||
| 		} | ||||
| 		if p+int(info.size) != i { | ||||
| 			if p == -1 { // no boundary found | ||||
| 				return -1 | ||||
| 			} | ||||
| 			return i // boundary after an illegal UTF-8 encoding | ||||
| 		} | ||||
| 	} | ||||
| 	return i | ||||
| } | ||||
|  | ||||
| // decomposeSegment scans the first segment in src into rb. It inserts 0x034f | ||||
| // (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters | ||||
| // and returns the number of bytes consumed from src or iShortDst or iShortSrc. | ||||
| func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { | ||||
| 	// Force one character to be consumed. | ||||
| 	info := rb.f.info(rb.src, sp) | ||||
| 	if info.size == 0 { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	if s := rb.ss.next(info); s == ssStarter { | ||||
| 		// TODO: this could be removed if we don't support merging. | ||||
| 		if rb.nrune > 0 { | ||||
| 			goto end | ||||
| 		} | ||||
| 	} else if s == ssOverflow { | ||||
| 		rb.insertCGJ() | ||||
| 		goto end | ||||
| 	} | ||||
| 	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { | ||||
| 		return int(err) | ||||
| 	} | ||||
| 	for { | ||||
| 		sp += int(info.size) | ||||
| 		if sp >= rb.nsrc { | ||||
| 			if !atEOF && !info.BoundaryAfter() { | ||||
| 				return int(iShortSrc) | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		info = rb.f.info(rb.src, sp) | ||||
| 		if info.size == 0 { | ||||
| 			if !atEOF { | ||||
| 				return int(iShortSrc) | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		if s := rb.ss.next(info); s == ssStarter { | ||||
| 			break | ||||
| 		} else if s == ssOverflow { | ||||
| 			rb.insertCGJ() | ||||
| 			break | ||||
| 		} | ||||
| 		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { | ||||
| 			return int(err) | ||||
| 		} | ||||
| 	} | ||||
| end: | ||||
| 	if !rb.doFlush() { | ||||
| 		return int(iShortDst) | ||||
| 	} | ||||
| 	return sp | ||||
| } | ||||
|  | ||||
| // lastRuneStart returns the runeInfo and position of the last | ||||
| // rune in buf or the zero runeInfo and -1 if no rune was found. | ||||
| func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) { | ||||
| 	p := len(buf) - 1 | ||||
| 	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { | ||||
| 	} | ||||
| 	if p < 0 { | ||||
| 		return Properties{}, -1 | ||||
| 	} | ||||
| 	return fd.info(inputBytes(buf), p), p | ||||
| } | ||||
|  | ||||
| // decomposeToLastBoundary finds an open segment at the end of the buffer | ||||
| // and scans it into rb. Returns the buffer minus the last segment. | ||||
| func decomposeToLastBoundary(rb *reorderBuffer) { | ||||
| 	fd := &rb.f | ||||
| 	info, i := lastRuneStart(fd, rb.out) | ||||
| 	if int(info.size) != len(rb.out)-i { | ||||
| 		// illegal trailing continuation bytes | ||||
| 		return | ||||
| 	} | ||||
| 	if info.BoundaryAfter() { | ||||
| 		return | ||||
| 	} | ||||
| 	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order | ||||
| 	padd := 0 | ||||
| 	ss := streamSafe(0) | ||||
| 	p := len(rb.out) | ||||
| 	for { | ||||
| 		add[padd] = info | ||||
| 		v := ss.backwards(info) | ||||
| 		if v == ssOverflow { | ||||
| 			// Note that if we have an overflow, it the string we are appending to | ||||
| 			// is not correctly normalized. In this case the behavior is undefined. | ||||
| 			break | ||||
| 		} | ||||
| 		padd++ | ||||
| 		p -= int(info.size) | ||||
| 		if v == ssStarter || p < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		info, i = lastRuneStart(fd, rb.out[:p]) | ||||
| 		if int(info.size) != p-i { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	rb.ss = ss | ||||
| 	// Copy bytes for insertion as we may need to overwrite rb.out. | ||||
| 	var buf [maxBufferSize * utf8.UTFMax]byte | ||||
| 	cp := buf[:copy(buf[:], rb.out[p:])] | ||||
| 	rb.out = rb.out[:p] | ||||
| 	for padd--; padd >= 0; padd-- { | ||||
| 		info = add[padd] | ||||
| 		rb.insertUnsafe(inputBytes(cp), 0, info) | ||||
| 		cp = cp[info.size:] | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,125 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "io" | ||||
|  | ||||
| type normWriter struct { | ||||
| 	rb  reorderBuffer | ||||
| 	w   io.Writer | ||||
| 	buf []byte | ||||
| } | ||||
|  | ||||
| // Write implements the standard write interface.  If the last characters are | ||||
| // not at a normalization boundary, the bytes will be buffered for the next | ||||
| // write. The remaining bytes will be written on close. | ||||
| func (w *normWriter) Write(data []byte) (n int, err error) { | ||||
| 	// Process data in pieces to keep w.buf size bounded. | ||||
| 	const chunk = 4000 | ||||
|  | ||||
| 	for len(data) > 0 { | ||||
| 		// Normalize into w.buf. | ||||
| 		m := len(data) | ||||
| 		if m > chunk { | ||||
| 			m = chunk | ||||
| 		} | ||||
| 		w.rb.src = inputBytes(data[:m]) | ||||
| 		w.rb.nsrc = m | ||||
| 		w.buf = doAppend(&w.rb, w.buf, 0) | ||||
| 		data = data[m:] | ||||
| 		n += m | ||||
|  | ||||
| 		// Write out complete prefix, save remainder. | ||||
| 		// Note that lastBoundary looks back at most 31 runes. | ||||
| 		i := lastBoundary(&w.rb.f, w.buf) | ||||
| 		if i == -1 { | ||||
| 			i = 0 | ||||
| 		} | ||||
| 		if i > 0 { | ||||
| 			if _, err = w.w.Write(w.buf[:i]); err != nil { | ||||
| 				break | ||||
| 			} | ||||
| 			bn := copy(w.buf, w.buf[i:]) | ||||
| 			w.buf = w.buf[:bn] | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // Close forces data that remains in the buffer to be written. | ||||
| func (w *normWriter) Close() error { | ||||
| 	if len(w.buf) > 0 { | ||||
| 		_, err := w.w.Write(w.buf) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Writer returns a new writer that implements Write(b) | ||||
| // by writing f(b) to w. The returned writer may use an | ||||
| // internal buffer to maintain state across Write calls. | ||||
| // Calling its Close method writes any buffered data to w. | ||||
| func (f Form) Writer(w io.Writer) io.WriteCloser { | ||||
| 	wr := &normWriter{rb: reorderBuffer{}, w: w} | ||||
| 	wr.rb.init(f, nil) | ||||
| 	return wr | ||||
| } | ||||
|  | ||||
| type normReader struct { | ||||
| 	rb           reorderBuffer | ||||
| 	r            io.Reader | ||||
| 	inbuf        []byte | ||||
| 	outbuf       []byte | ||||
| 	bufStart     int | ||||
| 	lastBoundary int | ||||
| 	err          error | ||||
| } | ||||
|  | ||||
| // Read implements the standard read interface. | ||||
| func (r *normReader) Read(p []byte) (int, error) { | ||||
| 	for { | ||||
| 		if r.lastBoundary-r.bufStart > 0 { | ||||
| 			n := copy(p, r.outbuf[r.bufStart:r.lastBoundary]) | ||||
| 			r.bufStart += n | ||||
| 			if r.lastBoundary-r.bufStart > 0 { | ||||
| 				return n, nil | ||||
| 			} | ||||
| 			return n, r.err | ||||
| 		} | ||||
| 		if r.err != nil { | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 		outn := copy(r.outbuf, r.outbuf[r.lastBoundary:]) | ||||
| 		r.outbuf = r.outbuf[0:outn] | ||||
| 		r.bufStart = 0 | ||||
|  | ||||
| 		n, err := r.r.Read(r.inbuf) | ||||
| 		r.rb.src = inputBytes(r.inbuf[0:n]) | ||||
| 		r.rb.nsrc, r.err = n, err | ||||
| 		if n > 0 { | ||||
| 			r.outbuf = doAppend(&r.rb, r.outbuf, 0) | ||||
| 		} | ||||
| 		if err == io.EOF { | ||||
| 			r.lastBoundary = len(r.outbuf) | ||||
| 		} else { | ||||
| 			r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf) | ||||
| 			if r.lastBoundary == -1 { | ||||
| 				r.lastBoundary = 0 | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Reader returns a new reader that implements Read | ||||
| // by reading data from r and returning f(data). | ||||
| func (f Form) Reader(r io.Reader) io.Reader { | ||||
| 	const chunk = 4000 | ||||
| 	buf := make([]byte, chunk) | ||||
| 	rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf} | ||||
| 	rr.rb.init(f, buf) | ||||
| 	return rr | ||||
| } | ||||
							
								
								
									
										7657
									
								
								vendor/golang.org/x/text/unicode/norm/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7657
									
								
								vendor/golang.org/x/text/unicode/norm/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7693
									
								
								vendor/golang.org/x/text/unicode/norm/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7693
									
								
								vendor/golang.org/x/text/unicode/norm/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7710
									
								
								vendor/golang.org/x/text/unicode/norm/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7710
									
								
								vendor/golang.org/x/text/unicode/norm/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7760
									
								
								vendor/golang.org/x/text/unicode/norm/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7760
									
								
								vendor/golang.org/x/text/unicode/norm/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7637
									
								
								vendor/golang.org/x/text/unicode/norm/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7637
									
								
								vendor/golang.org/x/text/unicode/norm/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| // Copyright 2013 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| // Reset implements the Reset method of the transform.Transformer interface. | ||||
| func (Form) Reset() {} | ||||
|  | ||||
| // Transform implements the Transform method of the transform.Transformer | ||||
| // interface. It may need to write segments of up to MaxSegmentSize at once. | ||||
| // Users should either catch ErrShortDst and allow dst to grow or have dst be at | ||||
| // least of size MaxTransformChunkSize to be guaranteed of progress. | ||||
| func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	// Cap the maximum number of src bytes to check. | ||||
| 	b := src | ||||
| 	eof := atEOF | ||||
| 	if ns := len(dst); ns < len(b) { | ||||
| 		err = transform.ErrShortDst | ||||
| 		eof = false | ||||
| 		b = b[:ns] | ||||
| 	} | ||||
| 	i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof) | ||||
| 	n := copy(dst, b[:i]) | ||||
| 	if !ok { | ||||
| 		nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF) | ||||
| 		return nDst + n, nSrc + n, err | ||||
| 	} | ||||
|  | ||||
| 	if err == nil && n < len(src) && !atEOF { | ||||
| 		err = transform.ErrShortSrc | ||||
| 	} | ||||
| 	return n, n, err | ||||
| } | ||||
|  | ||||
| func flushTransform(rb *reorderBuffer) bool { | ||||
| 	// Write out (must fully fit in dst, or else it is an ErrShortDst). | ||||
| 	if len(rb.out) < rb.nrune*utf8.UTFMax { | ||||
| 		return false | ||||
| 	} | ||||
| 	rb.out = rb.out[rb.flushCopy(rb.out):] | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc} | ||||
|  | ||||
| // transform implements the transform.Transformer interface. It is only called | ||||
| // when quickSpan does not pass for a given string. | ||||
| func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	// TODO: get rid of reorderBuffer. See CL 23460044. | ||||
| 	rb := reorderBuffer{} | ||||
| 	rb.init(f, src) | ||||
| 	for { | ||||
| 		// Load segment into reorder buffer. | ||||
| 		rb.setFlusher(dst[nDst:], flushTransform) | ||||
| 		end := decomposeSegment(&rb, nSrc, atEOF) | ||||
| 		if end < 0 { | ||||
| 			return nDst, nSrc, errs[-end] | ||||
| 		} | ||||
| 		nDst = len(dst) - len(rb.out) | ||||
| 		nSrc = end | ||||
|  | ||||
| 		// Next quickSpan. | ||||
| 		end = rb.nsrc | ||||
| 		eof := atEOF | ||||
| 		if n := nSrc + len(dst) - nDst; n < end { | ||||
| 			err = transform.ErrShortDst | ||||
| 			end = n | ||||
| 			eof = false | ||||
| 		} | ||||
| 		end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof) | ||||
| 		n := copy(dst[nDst:], rb.src.bytes[nSrc:end]) | ||||
| 		nSrc += n | ||||
| 		nDst += n | ||||
| 		if ok { | ||||
| 			if err == nil && n < rb.nsrc && !atEOF { | ||||
| 				err = transform.ErrShortSrc | ||||
| 			} | ||||
| 			return nDst, nSrc, err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| type valueRange struct { | ||||
| 	value  uint16 // header: value:stride | ||||
| 	lo, hi byte   // header: lo:n | ||||
| } | ||||
|  | ||||
| type sparseBlocks struct { | ||||
| 	values []valueRange | ||||
| 	offset []uint16 | ||||
| } | ||||
|  | ||||
| var nfcSparse = sparseBlocks{ | ||||
| 	values: nfcSparseValues[:], | ||||
| 	offset: nfcSparseOffset[:], | ||||
| } | ||||
|  | ||||
| var nfkcSparse = sparseBlocks{ | ||||
| 	values: nfkcSparseValues[:], | ||||
| 	offset: nfkcSparseOffset[:], | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	nfcData  = newNfcTrie(0) | ||||
| 	nfkcData = newNfkcTrie(0) | ||||
| ) | ||||
|  | ||||
| // lookupValue determines the type of block n and looks up the value for b. | ||||
| // For n < t.cutoff, the block is a simple lookup table. Otherwise, the block | ||||
| // is a list of ranges with an accompanying value. Given a matching range r, | ||||
| // the value for b is by r.value + (b - r.lo) * stride. | ||||
| func (t *sparseBlocks) lookup(n uint32, b byte) uint16 { | ||||
| 	offset := t.offset[n] | ||||
| 	header := t.values[offset] | ||||
| 	lo := offset + 1 | ||||
| 	hi := lo + uint16(header.lo) | ||||
| 	for lo < hi { | ||||
| 		m := lo + (hi-lo)/2 | ||||
| 		r := t.values[m] | ||||
| 		if r.lo <= b && b <= r.hi { | ||||
| 			return r.value + uint16(b-r.lo)*header.value | ||||
| 		} | ||||
| 		if b < r.lo { | ||||
| 			hi = m | ||||
| 		} else { | ||||
| 			lo = m + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
		Reference in New Issue
	
	Block a user