package textseg import "unicode/utf8" // ScanGraphemeClusters is a split function for bufio.Scanner that splits // on UTF8 sequence boundaries. // // This is included largely for completeness, since this behavior is already // built in to Go when ranging over a string. func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) { if len(data) == 0 { return 0, nil, nil } r, seqLen := utf8.DecodeRune(data) if r == utf8.RuneError && !atEOF { return 0, nil, nil } return seqLen, data[:seqLen], nil }