003 File Manager
Current Path:
/usr/src/contrib/subversion/subversion/libsvn_subr
usr
/
src
/
contrib
/
subversion
/
subversion
/
libsvn_subr
/
📁
..
📄
adler32.c
(3.16 KB)
📄
atomic.c
(7.23 KB)
📄
auth.c
(32.4 KB)
📄
auth.h
(6.1 KB)
📄
base64.c
(19.18 KB)
📄
bit_array.c
(6.26 KB)
📄
cache-inprocess.c
(21.2 KB)
📄
cache-membuffer.c
(116.43 KB)
📄
cache-memcache.c
(17.35 KB)
📄
cache-null.c
(4.35 KB)
📄
cache.c
(10.22 KB)
📄
cache.h
(4.03 KB)
📄
cache_config.c
(6.01 KB)
📄
checksum.c
(24.17 KB)
📄
checksum.h
(2.32 KB)
📄
cmdline.c
(58.99 KB)
📄
compat.c
(5.28 KB)
📄
compress_lz4.c
(4.51 KB)
📄
compress_zlib.c
(6.85 KB)
📄
config.c
(39.66 KB)
📄
config_auth.c
(8.65 KB)
📄
config_file.c
(70.25 KB)
📄
config_impl.h
(6.15 KB)
📄
config_keys.inc
(2.65 KB)
📄
config_win.c
(9.61 KB)
📄
crypto.c
(25.19 KB)
📄
crypto.h
(5.43 KB)
📄
ctype.c
(13.81 KB)
📄
date.c
(12.86 KB)
📄
debug.c
(3.77 KB)
📄
deprecated.c
(64.3 KB)
📄
dirent_uri.c
(80.17 KB)
📄
dirent_uri.h
(1.43 KB)
📄
dso.c
(3.74 KB)
📄
encode.c
(2.69 KB)
📄
eol.c
(2.79 KB)
📄
error.c
(24.52 KB)
📄
errorcode.inc
(4.27 KB)
📄
fnv1a.c
(6.92 KB)
📄
fnv1a.h
(3.04 KB)
📄
genctype.py
(3.84 KB)
📄
gpg_agent.c
(22.29 KB)
📄
hash.c
(19.59 KB)
📄
internal_statements.h
(1.87 KB)
📄
internal_statements.sql
(1.58 KB)
📄
io.c
(182.6 KB)
📄
iter.c
(3.84 KB)
📄
libsvn_subr.pc.in
(515 B)
📄
lock.c
(1.69 KB)
📄
log.c
(14 KB)
📁
lz4
📄
macos_keychain.c
(9.7 KB)
📄
magic.c
(5.25 KB)
📄
md5.c
(1.8 KB)
📄
mergeinfo.c
(89.24 KB)
📄
mutex.c
(3.06 KB)
📄
nls.c
(3.06 KB)
📄
object_pool.c
(9.76 KB)
📄
opt.c
(39.34 KB)
📄
opt.h
(1.89 KB)
📄
packed_data.c
(33.43 KB)
📄
path.c
(35.99 KB)
📄
pool.c
(4.35 KB)
📄
pools.h
(1.45 KB)
📄
prefix_string.c
(10.65 KB)
📄
prompt.c
(28.82 KB)
📄
properties.c
(13.59 KB)
📄
quoprint.c
(8.96 KB)
📄
root_pools.c
(3.15 KB)
📄
simple_providers.c
(25.54 KB)
📄
skel.c
(23.04 KB)
📄
sorts.c
(15.9 KB)
📄
spillbuf.c
(19.78 KB)
📄
sqlite.c
(49.68 KB)
📄
sqlite3wrapper.c
(3.16 KB)
📄
ssl_client_cert_providers.c
(6.66 KB)
📄
ssl_client_cert_pw_providers.c
(18.93 KB)
📄
ssl_server_trust_providers.c
(7.77 KB)
📄
stream.c
(65.69 KB)
📄
string.c
(40.2 KB)
📄
subst.c
(67.57 KB)
📄
sysinfo.c
(43.67 KB)
📄
sysinfo.h
(2.58 KB)
📄
target.c
(11.58 KB)
📄
temp_serializer.c
(14.02 KB)
📄
time.c
(9.08 KB)
📄
token.c
(2.58 KB)
📄
types.c
(9.43 KB)
📄
user.c
(2.66 KB)
📄
username_providers.c
(9.2 KB)
📄
utf.c
(40.38 KB)
📁
utf8proc
📄
utf8proc.c
(20.71 KB)
📄
utf_validate.c
(13.3 KB)
📄
utf_width.c
(10.85 KB)
📄
validate.c
(3.35 KB)
📄
version.c
(9.89 KB)
📄
win32_crashrpt.c
(25.06 KB)
📄
win32_crashrpt.h
(1.35 KB)
📄
win32_crashrpt_dll.h
(4 KB)
📄
win32_crypto.c
(18.12 KB)
📄
win32_xlate.c
(7.3 KB)
📄
win32_xlate.h
(2.11 KB)
📄
x509.h
(3.75 KB)
📄
x509info.c
(9.53 KB)
📄
x509parse.c
(35.77 KB)
📄
xml.c
(20.01 KB)
Editing: utf_validate.c
/* * utf_validate.c: Validate a UTF-8 string * * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ==================================================================== */ /* Validate a UTF-8 string according to the rules in * * Table 3-6. Well-Formed UTF-8 Bytes Sequences * * in * * The Unicode Standard, Version 4.0 * * which is available at * * http://www.unicode.org/ * * UTF-8 was originally defined in RFC-2279, Unicode's "well-formed UTF-8" * is a subset of that enconding. The Unicode enconding prohibits things * like non-shortest encodings (some characters can be represented by more * than one multi-byte encoding) and the encodings for the surrogate code * points. RFC-3629 superceeds RFC-2279 and adopts the same well-formed * rules as Unicode. This is the ABNF in RFC-3629 that describes * well-formed UTF-8 rules: * * UTF8-octets = *( UTF8-char ) * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 * UTF8-1 = %x00-7F * UTF8-2 = %xC2-DF UTF8-tail * UTF8-3 = %xE0 %xA0-BF UTF8-tail / * %xE1-EC 2( UTF8-tail ) / * %xED %x80-9F UTF8-tail / * %xEE-EF 2( UTF8-tail ) * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / * %xF1-F3 3( UTF8-tail ) / * %xF4 %x80-8F 2( UTF8-tail ) * UTF8-tail = %x80-BF * */ #include "private/svn_utf_private.h" #include "private/svn_eol_private.h" #include "private/svn_dep_compat.h" /* Lookup table to categorise each octet in the string. */ static const char octet_category[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x7f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x90-0x9f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xa0-0xbf */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, /* 0xc0-0xc1 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, /* 0xc2-0xdf */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, /* 0xe0 */ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* 0xe1-0xec */ 8, /* 0xed */ 9, 9, /* 0xee-0xef */ 10, /* 0xf0 */ 11, 11, 11, /* 0xf1-0xf3 */ 12, /* 0xf4 */ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 /* 0xf5-0xff */ }; /* Machine states */ #define FSM_START 0 #define FSM_80BF 1 #define FSM_A0BF 2 #define FSM_80BF80BF 3 #define FSM_809F 4 #define FSM_90BF 5 #define FSM_80BF80BF80BF 6 #define FSM_808F 7 #define FSM_ERROR 8 /* In the FSM it appears that categories 0xc0-0xc1 and 0xf5-0xff make the same transitions, as do categories 0xe1-0xec and 0xee-0xef. I wonder if there is any great benefit in combining categories? It would reduce the memory footprint of the transition table by 16 bytes, but might it be harder to understand? */ /* Machine transition table */ static const char machine [9][14] = { /* FSM_START */ {FSM_START, /* 0x00-0x7f */ FSM_ERROR, /* 0x80-0x8f */ FSM_ERROR, /* 0x90-0x9f */ FSM_ERROR, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_80BF, /* 0xc2-0xdf */ FSM_A0BF, /* 0xe0 */ FSM_80BF80BF, /* 0xe1-0xec */ FSM_809F, /* 0xed */ FSM_80BF80BF, /* 0xee-0xef */ FSM_90BF, /* 0xf0 */ FSM_80BF80BF80BF, /* 0xf1-0xf3 */ FSM_808F, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_80BF */ {FSM_ERROR, /* 0x00-0x7f */ FSM_START, /* 0x80-0x8f */ FSM_START, /* 0x90-0x9f */ FSM_START, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_A0BF */ {FSM_ERROR, /* 0x00-0x7f */ FSM_ERROR, /* 0x80-0x8f */ FSM_ERROR, /* 0x90-0x9f */ FSM_80BF, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_80BF80BF */ {FSM_ERROR, /* 0x00-0x7f */ FSM_80BF, /* 0x80-0x8f */ FSM_80BF, /* 0x90-0x9f */ FSM_80BF, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_809F */ {FSM_ERROR, /* 0x00-0x7f */ FSM_80BF, /* 0x80-0x8f */ FSM_80BF, /* 0x90-0x9f */ FSM_ERROR, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_90BF */ {FSM_ERROR, /* 0x00-0x7f */ FSM_ERROR, /* 0x80-0x8f */ FSM_80BF80BF, /* 0x90-0x9f */ FSM_80BF80BF, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_80BF80BF80BF */ {FSM_ERROR, /* 0x00-0x7f */ FSM_80BF80BF, /* 0x80-0x8f */ FSM_80BF80BF, /* 0x90-0x9f */ FSM_80BF80BF, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_808F */ {FSM_ERROR, /* 0x00-0x7f */ FSM_80BF80BF, /* 0x80-0x8f */ FSM_ERROR, /* 0x90-0x9f */ FSM_ERROR, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ /* FSM_ERROR */ {FSM_ERROR, /* 0x00-0x7f */ FSM_ERROR, /* 0x80-0x8f */ FSM_ERROR, /* 0x90-0x9f */ FSM_ERROR, /* 0xa0-0xbf */ FSM_ERROR, /* 0xc0-0xc1 */ FSM_ERROR, /* 0xc2-0xdf */ FSM_ERROR, /* 0xe0 */ FSM_ERROR, /* 0xe1-0xec */ FSM_ERROR, /* 0xed */ FSM_ERROR, /* 0xee-0xef */ FSM_ERROR, /* 0xf0 */ FSM_ERROR, /* 0xf1-0xf3 */ FSM_ERROR, /* 0xf4 */ FSM_ERROR}, /* 0xf5-0xff */ }; /* Scan MAX_LEN bytes in *DATA for chars that are not in the octet * category 0 (FSM_START). Return the position of the first such char * or DATA + MAX_LEN if all were cat 0. */ static const char * first_non_fsm_start_char(const char *data, apr_size_t max_len) { #if SVN_UNALIGNED_ACCESS_IS_OK /* Scan the input one machine word at a time. */ for (; max_len > sizeof(apr_uintptr_t) ; data += sizeof(apr_uintptr_t), max_len -= sizeof(apr_uintptr_t)) if (*(const apr_uintptr_t *)data & SVN__BIT_7_SET) break; #endif /* The remaining odd bytes will be examined the naive way: */ for (; max_len > 0; ++data, --max_len) if ((unsigned char)*data >= 0x80) break; return data; } const char * svn_utf__last_valid(const char *data, apr_size_t len) { const char *start = first_non_fsm_start_char(data, len); const char *end = data + len; int state = FSM_START; data = start; while (data < end) { unsigned char octet = *data++; int category = octet_category[octet]; state = machine[state][category]; if (state == FSM_START) start = data; } return start; } svn_boolean_t svn_utf__cstring_is_valid(const char *data) { if (!data) return FALSE; return svn_utf__is_valid(data, strlen(data)); } svn_boolean_t svn_utf__is_valid(const char *data, apr_size_t len) { const char *end = data + len; int state = FSM_START; if (!data) return FALSE; data = first_non_fsm_start_char(data, len); while (data < end) { unsigned char octet = *data++; int category = octet_category[octet]; state = machine[state][category]; } return state == FSM_START; } const char * svn_utf__last_valid2(const char *data, apr_size_t len) { const char *start = first_non_fsm_start_char(data, len); const char *end = data + len; int state = FSM_START; data = start; while (data < end) { unsigned char octet = *data++; switch (state) { case FSM_START: if (octet <= 0x7F) break; else if (octet <= 0xC1) state = FSM_ERROR; else if (octet <= 0xDF) state = FSM_80BF; else if (octet == 0xE0) state = FSM_A0BF; else if (octet <= 0xEC) state = FSM_80BF80BF; else if (octet == 0xED) state = FSM_809F; else if (octet <= 0xEF) state = FSM_80BF80BF; else if (octet == 0xF0) state = FSM_90BF; else if (octet <= 0xF3) state = FSM_80BF80BF80BF; else if (octet <= 0xF4) state = FSM_808F; else state = FSM_ERROR; break; case FSM_80BF: if (octet >= 0x80 && octet <= 0xBF) state = FSM_START; else state = FSM_ERROR; break; case FSM_A0BF: if (octet >= 0xA0 && octet <= 0xBF) state = FSM_80BF; else state = FSM_ERROR; break; case FSM_80BF80BF: if (octet >= 0x80 && octet <= 0xBF) state = FSM_80BF; else state = FSM_ERROR; break; case FSM_809F: if (octet >= 0x80 && octet <= 0x9F) state = FSM_80BF; else state = FSM_ERROR; break; case FSM_90BF: if (octet >= 0x90 && octet <= 0xBF) state = FSM_80BF80BF; else state = FSM_ERROR; break; case FSM_80BF80BF80BF: if (octet >= 0x80 && octet <= 0xBF) state = FSM_80BF80BF; else state = FSM_ERROR; break; case FSM_808F: if (octet >= 0x80 && octet <= 0x8F) state = FSM_80BF80BF; else state = FSM_ERROR; break; default: case FSM_ERROR: return start; } if (state == FSM_START) start = data; } return start; }
Upload File
Create Folder