File Manager

003 File Manager

Current Path: /usr/src/contrib/subversion/subversion/libsvn_subr

usr / src / contrib / subversion / subversion / libsvn_subr /

📁 ..
📄 adler32.c(3.16 KB)
📄 atomic.c(7.23 KB)
📄 auth.c(32.4 KB)
📄 auth.h(6.1 KB)
📄 base64.c(19.18 KB)
📄 bit_array.c(6.26 KB)
📄 cache-inprocess.c(21.2 KB)
📄 cache-membuffer.c(116.43 KB)
📄 cache-memcache.c(17.35 KB)
📄 cache-null.c(4.35 KB)
📄 cache.c(10.22 KB)
📄 cache.h(4.03 KB)
📄 cache_config.c(6.01 KB)
📄 checksum.c(24.17 KB)
📄 checksum.h(2.32 KB)
📄 cmdline.c(58.99 KB)
📄 compat.c(5.28 KB)
📄 compress_lz4.c(4.51 KB)
📄 compress_zlib.c(6.85 KB)
📄 config.c(39.66 KB)
📄 config_auth.c(8.65 KB)
📄 config_file.c(70.25 KB)
📄 config_impl.h(6.15 KB)
📄 config_keys.inc(2.65 KB)
📄 config_win.c(9.61 KB)
📄 crypto.c(25.19 KB)
📄 crypto.h(5.43 KB)
📄 ctype.c(13.81 KB)
📄 date.c(12.86 KB)
📄 debug.c(3.77 KB)
📄 deprecated.c(64.3 KB)
📄 dirent_uri.c(80.17 KB)
📄 dirent_uri.h(1.43 KB)
📄 dso.c(3.74 KB)
📄 encode.c(2.69 KB)
📄 eol.c(2.79 KB)
📄 error.c(24.52 KB)
📄 errorcode.inc(4.27 KB)
📄 fnv1a.c(6.92 KB)
📄 fnv1a.h(3.04 KB)
📄 genctype.py(3.84 KB)
📄 gpg_agent.c(22.29 KB)
📄 hash.c(19.59 KB)
📄 internal_statements.h(1.87 KB)
📄 internal_statements.sql(1.58 KB)
📄 io.c(182.6 KB)
📄 iter.c(3.84 KB)
📄 libsvn_subr.pc.in(515 B)
📄 lock.c(1.69 KB)
📄 log.c(14 KB)
📁 lz4
📄 macos_keychain.c(9.7 KB)
📄 magic.c(5.25 KB)
📄 md5.c(1.8 KB)
📄 mergeinfo.c(89.24 KB)
📄 mutex.c(3.06 KB)
📄 nls.c(3.06 KB)
📄 object_pool.c(9.76 KB)
📄 opt.c(39.34 KB)
📄 opt.h(1.89 KB)
📄 packed_data.c(33.43 KB)
📄 path.c(35.99 KB)
📄 pool.c(4.35 KB)
📄 pools.h(1.45 KB)
📄 prefix_string.c(10.65 KB)
📄 prompt.c(28.82 KB)
📄 properties.c(13.59 KB)
📄 quoprint.c(8.96 KB)
📄 root_pools.c(3.15 KB)
📄 simple_providers.c(25.54 KB)
📄 skel.c(23.04 KB)
📄 sorts.c(15.9 KB)
📄 spillbuf.c(19.78 KB)
📄 sqlite.c(49.68 KB)
📄 sqlite3wrapper.c(3.16 KB)
📄 ssl_client_cert_providers.c(6.66 KB)
📄 ssl_client_cert_pw_providers.c(18.93 KB)
📄 ssl_server_trust_providers.c(7.77 KB)
📄 stream.c(65.69 KB)
📄 string.c(40.2 KB)
📄 subst.c(67.57 KB)
📄 sysinfo.c(43.67 KB)
📄 sysinfo.h(2.58 KB)
📄 target.c(11.58 KB)
📄 temp_serializer.c(14.02 KB)
📄 time.c(9.08 KB)
📄 token.c(2.58 KB)
📄 types.c(9.43 KB)
📄 user.c(2.66 KB)
📄 username_providers.c(9.2 KB)
📄 utf.c(40.38 KB)
📁 utf8proc
📄 utf8proc.c(20.71 KB)
📄 utf_validate.c(13.3 KB)
📄 utf_width.c(10.85 KB)
📄 validate.c(3.35 KB)
📄 version.c(9.89 KB)
📄 win32_crashrpt.c(25.06 KB)
📄 win32_crashrpt.h(1.35 KB)
📄 win32_crashrpt_dll.h(4 KB)
📄 win32_crypto.c(18.12 KB)
📄 win32_xlate.c(7.3 KB)
📄 win32_xlate.h(2.11 KB)
📄 x509.h(3.75 KB)
📄 x509info.c(9.53 KB)
📄 x509parse.c(35.77 KB)
📄 xml.c(20.01 KB)

Editing: utf_validate.c

/*
 * utf_validate.c:  Validate a UTF-8 string
 *
 * ====================================================================
 *    Licensed to the Apache Software Foundation (ASF) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The ASF licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
 */

/* Validate a UTF-8 string according to the rules in
 *
 *    Table 3-6. Well-Formed UTF-8 Bytes Sequences
 *
 * in
 *
 *    The Unicode Standard, Version 4.0
 *
 * which is available at
 *
 *    http://www.unicode.org/
 *
 * UTF-8 was originally defined in RFC-2279, Unicode's "well-formed UTF-8"
 * is a subset of that enconding.  The Unicode enconding prohibits things
 * like non-shortest encodings (some characters can be represented by more
 * than one multi-byte encoding) and the encodings for the surrogate code
 * points.  RFC-3629 superceeds RFC-2279 and adopts the same well-formed
 * rules as Unicode.  This is the ABNF in RFC-3629 that describes
 * well-formed UTF-8 rules:
 *
 *   UTF8-octets = *( UTF8-char )
 *   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
 *   UTF8-1      = %x00-7F
 *   UTF8-2      = %xC2-DF UTF8-tail
 *   UTF8-3      = %xE0 %xA0-BF UTF8-tail /
 *                 %xE1-EC 2( UTF8-tail ) /
 *                 %xED %x80-9F UTF8-tail /
 *                 %xEE-EF 2( UTF8-tail )
 *   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) /
 *                 %xF1-F3 3( UTF8-tail ) /
 *                 %xF4 %x80-8F 2( UTF8-tail )
 *   UTF8-tail   = %x80-BF
 *
 */

#include "private/svn_utf_private.h"
#include "private/svn_eol_private.h"
#include "private/svn_dep_compat.h"

/* Lookup table to categorise each octet in the string. */
static const char octet_category[256] = {
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 0x00-0x7f */
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, /* 0x80-0x8f */
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, /* 0x90-0x9f */
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, /* 0xa0-0xbf */
  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
  4,  4,                                                         /* 0xc0-0xc1 */
          5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, /* 0xc2-0xdf */
  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
  6,                                                             /* 0xe0 */
      7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,             /* 0xe1-0xec */
                                                      8,         /* 0xed */
                                                          9,  9, /* 0xee-0xef */
  10,                                                            /* 0xf0 */
      11, 11, 11,                                                /* 0xf1-0xf3 */
                  12,                                            /* 0xf4 */
                      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 /* 0xf5-0xff */
};

/* Machine states */
#define FSM_START         0
#define FSM_80BF          1
#define FSM_A0BF          2
#define FSM_80BF80BF      3
#define FSM_809F          4
#define FSM_90BF          5
#define FSM_80BF80BF80BF  6
#define FSM_808F          7
#define FSM_ERROR         8

/* In the FSM it appears that categories 0xc0-0xc1 and 0xf5-0xff make the
   same transitions, as do categories 0xe1-0xec and 0xee-0xef.  I wonder if
   there is any great benefit in combining categories?  It would reduce the
   memory footprint of the transition table by 16 bytes, but might it be
   harder to understand?  */

/* Machine transition table */
static const char machine [9][14] = {
  /* FSM_START */
  {FSM_START,         /* 0x00-0x7f */
   FSM_ERROR,         /* 0x80-0x8f */
   FSM_ERROR,         /* 0x90-0x9f */
   FSM_ERROR,         /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_80BF,          /* 0xc2-0xdf */
   FSM_A0BF,          /* 0xe0 */
   FSM_80BF80BF,      /* 0xe1-0xec */
   FSM_809F,          /* 0xed */
   FSM_80BF80BF,      /* 0xee-0xef */
   FSM_90BF,          /* 0xf0 */
   FSM_80BF80BF80BF,  /* 0xf1-0xf3 */
   FSM_808F,          /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_80BF */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_START,         /* 0x80-0x8f */
   FSM_START,         /* 0x90-0x9f */
   FSM_START,         /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_A0BF */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_ERROR,         /* 0x80-0x8f */
   FSM_ERROR,         /* 0x90-0x9f */
   FSM_80BF,          /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_80BF80BF */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_80BF,          /* 0x80-0x8f */
   FSM_80BF,          /* 0x90-0x9f */
   FSM_80BF,          /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_809F */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_80BF,          /* 0x80-0x8f */
   FSM_80BF,          /* 0x90-0x9f */
   FSM_ERROR,         /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_90BF */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_ERROR,         /* 0x80-0x8f */
   FSM_80BF80BF,      /* 0x90-0x9f */
   FSM_80BF80BF,      /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_80BF80BF80BF */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_80BF80BF,      /* 0x80-0x8f */
   FSM_80BF80BF,      /* 0x90-0x9f */
   FSM_80BF80BF,      /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_808F */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_80BF80BF,      /* 0x80-0x8f */
   FSM_ERROR,         /* 0x90-0x9f */
   FSM_ERROR,         /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */

/* FSM_ERROR */
  {FSM_ERROR,         /* 0x00-0x7f */
   FSM_ERROR,         /* 0x80-0x8f */
   FSM_ERROR,         /* 0x90-0x9f */
   FSM_ERROR,         /* 0xa0-0xbf */
   FSM_ERROR,         /* 0xc0-0xc1 */
   FSM_ERROR,         /* 0xc2-0xdf */
   FSM_ERROR,         /* 0xe0 */
   FSM_ERROR,         /* 0xe1-0xec */
   FSM_ERROR,         /* 0xed */
   FSM_ERROR,         /* 0xee-0xef */
   FSM_ERROR,         /* 0xf0 */
   FSM_ERROR,         /* 0xf1-0xf3 */
   FSM_ERROR,         /* 0xf4 */
   FSM_ERROR},        /* 0xf5-0xff */
};

/* Scan MAX_LEN bytes in *DATA for chars that are not in the octet
 * category 0 (FSM_START).  Return the position of the first such char
 * or DATA + MAX_LEN if all were cat 0.
 */
static const char *
first_non_fsm_start_char(const char *data, apr_size_t max_len)
{
#if SVN_UNALIGNED_ACCESS_IS_OK

/* Scan the input one machine word at a time. */
  for (; max_len > sizeof(apr_uintptr_t)
       ; data += sizeof(apr_uintptr_t), max_len -= sizeof(apr_uintptr_t))
    if (*(const apr_uintptr_t *)data & SVN__BIT_7_SET)
      break;

#endif

/* The remaining odd bytes will be examined the naive way: */
  for (; max_len > 0; ++data, --max_len)
    if ((unsigned char)*data >= 0x80)
      break;

return data;
}

const char *
svn_utf__last_valid(const char *data, apr_size_t len)
{
  const char *start = first_non_fsm_start_char(data, len);
  const char *end = data + len;
  int state = FSM_START;

data = start;
  while (data < end)
    {
      unsigned char octet = *data++;
      int category = octet_category[octet];
      state = machine[state][category];
      if (state == FSM_START)
        start = data;
    }
  return start;
}

svn_boolean_t
svn_utf__cstring_is_valid(const char *data)
{
  if (!data)
    return FALSE;

return svn_utf__is_valid(data, strlen(data));
}

svn_boolean_t
svn_utf__is_valid(const char *data, apr_size_t len)
{
  const char *end = data + len;
  int state = FSM_START;

if (!data)
    return FALSE;

data = first_non_fsm_start_char(data, len);

while (data < end)
    {
      unsigned char octet = *data++;
      int category = octet_category[octet];
      state = machine[state][category];
    }
  return state == FSM_START;
}

const char *
svn_utf__last_valid2(const char *data, apr_size_t len)
{
  const char *start = first_non_fsm_start_char(data, len);
  const char *end = data + len;
  int state = FSM_START;

data = start;
  while (data < end)
    {
      unsigned char octet = *data++;
      switch (state)
        {
        case FSM_START:
          if (octet <= 0x7F)
            break;
          else if (octet <= 0xC1)
            state = FSM_ERROR;
          else if (octet <= 0xDF)
            state = FSM_80BF;
          else if (octet == 0xE0)
            state = FSM_A0BF;
          else if (octet <= 0xEC)
            state = FSM_80BF80BF;
          else if (octet == 0xED)
            state = FSM_809F;
          else if (octet <= 0xEF)
            state = FSM_80BF80BF;
          else if (octet == 0xF0)
            state = FSM_90BF;
          else if (octet <= 0xF3)
            state = FSM_80BF80BF80BF;
          else if (octet <= 0xF4)
            state = FSM_808F;
          else
            state = FSM_ERROR;
          break;
        case FSM_80BF:
          if (octet >= 0x80 && octet <= 0xBF)
            state = FSM_START;
          else
            state = FSM_ERROR;
          break;
        case FSM_A0BF:
          if (octet >= 0xA0 && octet <= 0xBF)
            state = FSM_80BF;
          else
            state = FSM_ERROR;
          break;
        case FSM_80BF80BF:
          if (octet >= 0x80 && octet <= 0xBF)
            state = FSM_80BF;
          else
            state = FSM_ERROR;
          break;
        case FSM_809F:
          if (octet >= 0x80 && octet <= 0x9F)
            state = FSM_80BF;
          else
            state = FSM_ERROR;
          break;
        case FSM_90BF:
          if (octet >= 0x90 && octet <= 0xBF)
            state = FSM_80BF80BF;
          else
            state = FSM_ERROR;
          break;
        case FSM_80BF80BF80BF:
          if (octet >= 0x80 && octet <= 0xBF)
            state = FSM_80BF80BF;
          else
            state = FSM_ERROR;
          break;
        case FSM_808F:
          if (octet >= 0x80 && octet <= 0x8F)
            state = FSM_80BF80BF;
          else
            state = FSM_ERROR;
          break;
        default:
        case FSM_ERROR:
          return start;
        }
      if (state == FSM_START)
        start = data;
    }
  return start;
}

003 File Manager

Editing: utf_validate.c

Upload File

Create Folder