diff options
Diffstat (limited to 'unicode.go')
| -rw-r--r-- | unicode.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/unicode.go b/unicode.go new file mode 100644 index 0000000..e145d28 --- /dev/null +++ b/unicode.go @@ -0,0 +1,28 @@ +package main + +import ( + "unicode/utf8" +) + +// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string +// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, +// or any other common encoding). +func detectUTF8(s string) (valid, require bool) { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size + // Officially, ZIP uses CP-437, but many readers use the system's + // local character encoding. Most encoding are compatible with a large + // subset of CP-437, which itself is ASCII-like. + // + // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + if r < 0x20 || r > 0x7d || r == 0x5c { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { + return false, false + } + require = true + } + } + return true, require +} |
