charset

package

v0.22.0 Latest Latest Go to latest Published: Apr 29, 2025 License: Apache-2.0 Imports: 11 Imported by: 8

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/vitessio/vitess

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func Collapse(dst []byte, src []rune, dstCharset Charset) []byte
func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error)
func ConvertFromBinary(dst []byte, dstCharset Charset, src []byte) ([]byte, error)
func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error)
func Expand(dst []rune, src []byte, srcCharset Charset) []rune
func IsBackslashSafe(charset Charset) bool
func IsMultibyteByName(csname string) bool
func IsUnicode(charset Charset) bool
func IsUnicodeByName(csname string) bool
func Length(charset Charset, input []byte) int
func Slice(charset Charset, input []byte, from, to int) []byte
func TablenameToFilename(name string) string
func Validate(charset Charset, input []byte) bool
type Charset
type Charset_8bit
type Charset_binary
type Charset_cp932
type Charset_eucjpms
type Charset_euckr
type Charset_gb18030
type Charset_gb2312
type Charset_latin1
type Charset_sjis
type Charset_ucs2
type Charset_ujis
type Charset_utf16
type Charset_utf16le
type Charset_utf32
type Charset_utf8mb3
type Charset_utf8mb4
type Convertible
type UnicodeMapping

Constants ¶

View Source

const RuneError = utf8.RuneError

Variables ¶

This section is empty.

Functions ¶

func Collapse ¶ added in v0.18.0

func Collapse(dst []byte, src []rune, dstCharset Charset) []byte

func Convert ¶

func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error)

Convert transforms `src`, encoded with Charset `srcCharset`, and changes its encoding so that it becomes encoded with `dstCharset`. The result is appended to `dst` if `dst` is not nil; otherwise a new byte slice will be allocated to store the result.

func ConvertFromBinary ¶

func ConvertFromBinary(dst []byte, dstCharset Charset, src []byte) ([]byte, error)

func ConvertFromUTF8 ¶

func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error)

func Expand ¶ added in v0.18.0

func Expand(dst []rune, src []byte, srcCharset Charset) []rune

func IsBackslashSafe ¶

func IsBackslashSafe(charset Charset) bool

func IsMultibyteByName ¶

func IsMultibyteByName(csname string) bool

func IsUnicode ¶

func IsUnicode(charset Charset) bool

func IsUnicodeByName ¶

func IsUnicodeByName(csname string) bool

func Length ¶

func Length(charset Charset, input []byte) int

func Slice ¶

func Slice(charset Charset, input []byte, from, to int) []byte

func TablenameToFilename ¶ added in v0.22.0

func TablenameToFilename(name string) string

TablenameToFilename is a rewrite of MySQL's `tablename_to_filename` utility function. InnoDB table names are in the form of schema_name/table_name, except each of the tokens are encoded using this function. For simple characters there is no change, but special characters get encoded. Thus, the table `tbl-fts` in `test` db will be encoded as `test/tbl@002dfts`

Original encoding function:

  https://github.com/mysql/mysql-server/blob/89e1c722476deebc3ddc8675e779869f6da654c0/strings/ctype-utf8.cc#L6961-L6984

			static int my_wc_mb_filename(const CHARSET_INFO *cs [[maybe_unused]],
																	my_wc_t wc, uchar *s, uchar *e) {
				int code;
				char hex[] = "0123456789abcdef";

				if (s >= e) return MY_CS_TOOSMALL;

				if (wc < 128 && filename_safe_char[wc]) {
					*s = (uchar)wc;
					return 1;
				}

				if (s + 3 > e) return MY_CS_TOOSMALL3;

				*s++ = MY_FILENAME_ESCAPE;
				if ((wc >= 0x00C0 && wc <= 0x05FF && (code = uni_0C00_05FF[wc - 0x00C0])) ||
						(wc >= 0x1E00 && wc <= 0x1FFF && (code = uni_1E00_1FFF[wc - 0x1E00])) ||
						(wc >= 0x2160 && wc <= 0x217F && (code = uni_2160_217F[wc - 0x2160])) ||
						(wc >= 0x24B0 && wc <= 0x24EF && (code = uni_24B0_24EF[wc - 0x24B0])) ||
						(wc >= 0xFF20 && wc <= 0xFF5F && (code = uni_FF20_FF5F[wc - 0xFF20]))) {
					*s++ = (code / 80) + 0x30;
					*s++ = (code % 80) + 0x30;
					return 3;
				}

				/* Non letter */
				if (s + 5 > e) return MY_CS_TOOSMALL5;

				*s++ = hex[(wc >> 12) & 15];
				*s++ = hex[(wc >> 8) & 15];
				*s++ = hex[(wc >> 4) & 15];
				*s++ = hex[(wc)&15];
				return 5;
			}

func Validate ¶

func Validate(charset Charset, input []byte) bool

Types ¶

type Charset ¶

type Charset = types.Charset

type Charset_8bit ¶

type Charset_8bit = eightbit.Charset_8bit

type Charset_binary ¶

type Charset_binary = eightbit.Charset_binary

type Charset_cp932 ¶

type Charset_cp932 = japanese.Charset_cp932

type Charset_eucjpms ¶

type Charset_eucjpms = japanese.Charset_eucjpms

type Charset_euckr ¶

type Charset_euckr = korean.Charset_euckr

type Charset_gb18030 ¶

type Charset_gb18030 = simplifiedchinese.Charset_gb18030

type Charset_gb2312 ¶

type Charset_gb2312 = simplifiedchinese.Charset_gb2312

type Charset_latin1 ¶

type Charset_latin1 = eightbit.Charset_latin1

type Charset_sjis ¶

type Charset_sjis = japanese.Charset_sjis

type Charset_ucs2 ¶

type Charset_ucs2 = unicode.Charset_ucs2

type Charset_ujis ¶

type Charset_ujis = japanese.Charset_ujis

type Charset_utf16 ¶

type Charset_utf16 = unicode.Charset_utf16be

type Charset_utf16le ¶

type Charset_utf16le = unicode.Charset_utf16le

type Charset_utf32 ¶

type Charset_utf32 = unicode.Charset_utf32

type Charset_utf8mb3 ¶

type Charset_utf8mb3 = unicode.Charset_utf8mb3

type Charset_utf8mb4 ¶

type Charset_utf8mb4 = unicode.Charset_utf8mb4

type Convertible ¶

type Convertible interface {
	Charset
	Convert(dst, src []byte, from Charset) ([]byte, error)
}

type UnicodeMapping ¶

type UnicodeMapping = eightbit.UnicodeMapping

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
eightbit
japanese
korean Package korean provides Korean encodings such as EUC-KR.	Package korean provides Korean encodings such as EUC-KR.
simplifiedchinese Package simplifiedchinese provides Simplified Chinese encodings such as GBK.	Package simplifiedchinese provides Simplified Chinese encodings such as GBK.
types
unicode

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL