| 1 | /* |
| 2 | * Copyright (c) 2007 Mans Rullgard |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #ifndef AVUTIL_AVSTRING_H |
| 22 | #define AVUTIL_AVSTRING_H |
| 23 | |
| 24 | #include <stddef.h> |
| 25 | #include <stdint.h> |
| 26 | #include "attributes.h" |
| 27 | |
| 28 | /** |
| 29 | * @addtogroup lavu_string |
| 30 | * @{ |
| 31 | */ |
| 32 | |
| 33 | /** |
| 34 | * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to |
| 35 | * the address of the first character in str after the prefix. |
| 36 | * |
| 37 | * @param str input string |
| 38 | * @param pfx prefix to test |
| 39 | * @param ptr updated if the prefix is matched inside str |
| 40 | * @return non-zero if the prefix matches, zero otherwise |
| 41 | */ |
| 42 | int av_strstart(const char *str, const char *pfx, const char **ptr); |
| 43 | |
| 44 | /** |
| 45 | * Return non-zero if pfx is a prefix of str independent of case. If |
| 46 | * it is, *ptr is set to the address of the first character in str |
| 47 | * after the prefix. |
| 48 | * |
| 49 | * @param str input string |
| 50 | * @param pfx prefix to test |
| 51 | * @param ptr updated if the prefix is matched inside str |
| 52 | * @return non-zero if the prefix matches, zero otherwise |
| 53 | */ |
| 54 | int av_stristart(const char *str, const char *pfx, const char **ptr); |
| 55 | |
| 56 | /** |
| 57 | * Locate the first case-independent occurrence in the string haystack |
| 58 | * of the string needle. A zero-length string needle is considered to |
| 59 | * match at the start of haystack. |
| 60 | * |
| 61 | * This function is a case-insensitive version of the standard strstr(). |
| 62 | * |
| 63 | * @param haystack string to search in |
| 64 | * @param needle string to search for |
| 65 | * @return pointer to the located match within haystack |
| 66 | * or a null pointer if no match |
| 67 | */ |
| 68 | char *av_stristr(const char *haystack, const char *needle); |
| 69 | |
| 70 | /** |
| 71 | * Locate the first occurrence of the string needle in the string haystack |
| 72 | * where not more than hay_length characters are searched. A zero-length |
| 73 | * string needle is considered to match at the start of haystack. |
| 74 | * |
| 75 | * This function is a length-limited version of the standard strstr(). |
| 76 | * |
| 77 | * @param haystack string to search in |
| 78 | * @param needle string to search for |
| 79 | * @param hay_length length of string to search in |
| 80 | * @return pointer to the located match within haystack |
| 81 | * or a null pointer if no match |
| 82 | */ |
| 83 | char *av_strnstr(const char *haystack, const char *needle, size_t hay_length); |
| 84 | |
| 85 | /** |
| 86 | * Copy the string src to dst, but no more than size - 1 bytes, and |
| 87 | * null-terminate dst. |
| 88 | * |
| 89 | * This function is the same as BSD strlcpy(). |
| 90 | * |
| 91 | * @param dst destination buffer |
| 92 | * @param src source string |
| 93 | * @param size size of destination buffer |
| 94 | * @return the length of src |
| 95 | * |
| 96 | * @warning since the return value is the length of src, src absolutely |
| 97 | * _must_ be a properly 0-terminated string, otherwise this will read beyond |
| 98 | * the end of the buffer and possibly crash. |
| 99 | */ |
| 100 | size_t av_strlcpy(char *dst, const char *src, size_t size); |
| 101 | |
| 102 | /** |
| 103 | * Append the string src to the string dst, but to a total length of |
| 104 | * no more than size - 1 bytes, and null-terminate dst. |
| 105 | * |
| 106 | * This function is similar to BSD strlcat(), but differs when |
| 107 | * size <= strlen(dst). |
| 108 | * |
| 109 | * @param dst destination buffer |
| 110 | * @param src source string |
| 111 | * @param size size of destination buffer |
| 112 | * @return the total length of src and dst |
| 113 | * |
| 114 | * @warning since the return value use the length of src and dst, these |
| 115 | * absolutely _must_ be a properly 0-terminated strings, otherwise this |
| 116 | * will read beyond the end of the buffer and possibly crash. |
| 117 | */ |
| 118 | size_t av_strlcat(char *dst, const char *src, size_t size); |
| 119 | |
| 120 | /** |
| 121 | * Append output to a string, according to a format. Never write out of |
| 122 | * the destination buffer, and always put a terminating 0 within |
| 123 | * the buffer. |
| 124 | * @param dst destination buffer (string to which the output is |
| 125 | * appended) |
| 126 | * @param size total size of the destination buffer |
| 127 | * @param fmt printf-compatible format string, specifying how the |
| 128 | * following parameters are used |
| 129 | * @return the length of the string that would have been generated |
| 130 | * if enough space had been available |
| 131 | */ |
| 132 | size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4); |
| 133 | |
| 134 | /** |
| 135 | * Get the count of continuous non zero chars starting from the beginning. |
| 136 | * |
| 137 | * @param len maximum number of characters to check in the string, that |
| 138 | * is the maximum value which is returned by the function |
| 139 | */ |
| 140 | static inline size_t av_strnlen(const char *s, size_t len) |
| 141 | { |
| 142 | size_t i; |
| 143 | for (i = 0; i < len && s[i]; i++) |
| 144 | ; |
| 145 | return i; |
| 146 | } |
| 147 | |
| 148 | /** |
| 149 | * Print arguments following specified format into a large enough auto |
| 150 | * allocated buffer. It is similar to GNU asprintf(). |
| 151 | * @param fmt printf-compatible format string, specifying how the |
| 152 | * following parameters are used. |
| 153 | * @return the allocated string |
| 154 | * @note You have to free the string yourself with av_free(). |
| 155 | */ |
| 156 | char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2); |
| 157 | |
| 158 | /** |
| 159 | * Convert a number to a av_malloced string. |
| 160 | */ |
| 161 | char *av_d2str(double d); |
| 162 | |
| 163 | /** |
| 164 | * Unescape the given string until a non escaped terminating char, |
| 165 | * and return the token corresponding to the unescaped string. |
| 166 | * |
| 167 | * The normal \ and ' escaping is supported. Leading and trailing |
| 168 | * whitespaces are removed, unless they are escaped with '\' or are |
| 169 | * enclosed between ''. |
| 170 | * |
| 171 | * @param buf the buffer to parse, buf will be updated to point to the |
| 172 | * terminating char |
| 173 | * @param term a 0-terminated list of terminating chars |
| 174 | * @return the malloced unescaped string, which must be av_freed by |
| 175 | * the user, NULL in case of allocation failure |
| 176 | */ |
| 177 | char *av_get_token(const char **buf, const char *term); |
| 178 | |
| 179 | /** |
| 180 | * Split the string into several tokens which can be accessed by |
| 181 | * successive calls to av_strtok(). |
| 182 | * |
| 183 | * A token is defined as a sequence of characters not belonging to the |
| 184 | * set specified in delim. |
| 185 | * |
| 186 | * On the first call to av_strtok(), s should point to the string to |
| 187 | * parse, and the value of saveptr is ignored. In subsequent calls, s |
| 188 | * should be NULL, and saveptr should be unchanged since the previous |
| 189 | * call. |
| 190 | * |
| 191 | * This function is similar to strtok_r() defined in POSIX.1. |
| 192 | * |
| 193 | * @param s the string to parse, may be NULL |
| 194 | * @param delim 0-terminated list of token delimiters, must be non-NULL |
| 195 | * @param saveptr user-provided pointer which points to stored |
| 196 | * information necessary for av_strtok() to continue scanning the same |
| 197 | * string. saveptr is updated to point to the next character after the |
| 198 | * first delimiter found, or to NULL if the string was terminated |
| 199 | * @return the found token, or NULL when no token is found |
| 200 | */ |
| 201 | char *av_strtok(char *s, const char *delim, char **saveptr); |
| 202 | |
| 203 | /** |
| 204 | * Locale-independent conversion of ASCII isdigit. |
| 205 | */ |
| 206 | av_const int av_isdigit(int c); |
| 207 | |
| 208 | /** |
| 209 | * Locale-independent conversion of ASCII isgraph. |
| 210 | */ |
| 211 | av_const int av_isgraph(int c); |
| 212 | |
| 213 | /** |
| 214 | * Locale-independent conversion of ASCII isspace. |
| 215 | */ |
| 216 | av_const int av_isspace(int c); |
| 217 | |
| 218 | /** |
| 219 | * Locale-independent conversion of ASCII characters to uppercase. |
| 220 | */ |
| 221 | static inline av_const int av_toupper(int c) |
| 222 | { |
| 223 | if (c >= 'a' && c <= 'z') |
| 224 | c ^= 0x20; |
| 225 | return c; |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | * Locale-independent conversion of ASCII characters to lowercase. |
| 230 | */ |
| 231 | static inline av_const int av_tolower(int c) |
| 232 | { |
| 233 | if (c >= 'A' && c <= 'Z') |
| 234 | c ^= 0x20; |
| 235 | return c; |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * Locale-independent conversion of ASCII isxdigit. |
| 240 | */ |
| 241 | av_const int av_isxdigit(int c); |
| 242 | |
| 243 | /** |
| 244 | * Locale-independent case-insensitive compare. |
| 245 | * @note This means only ASCII-range characters are case-insensitive |
| 246 | */ |
| 247 | int av_strcasecmp(const char *a, const char *b); |
| 248 | |
| 249 | /** |
| 250 | * Locale-independent case-insensitive compare. |
| 251 | * @note This means only ASCII-range characters are case-insensitive |
| 252 | */ |
| 253 | int av_strncasecmp(const char *a, const char *b, size_t n); |
| 254 | |
| 255 | |
| 256 | /** |
| 257 | * Thread safe basename. |
| 258 | * @param path the path, on DOS both \ and / are considered separators. |
| 259 | * @return pointer to the basename substring. |
| 260 | */ |
| 261 | const char *av_basename(const char *path); |
| 262 | |
| 263 | /** |
| 264 | * Thread safe dirname. |
| 265 | * @param path the path, on DOS both \ and / are considered separators. |
| 266 | * @return the path with the separator replaced by the string terminator or ".". |
| 267 | * @note the function may change the input string. |
| 268 | */ |
| 269 | const char *av_dirname(char *path); |
| 270 | |
| 271 | /** |
| 272 | * Match instances of a name in a comma-separated list of names. |
| 273 | * @param name Name to look for. |
| 274 | * @param names List of names. |
| 275 | * @return 1 on match, 0 otherwise. |
| 276 | */ |
| 277 | int av_match_name(const char *name, const char *names); |
| 278 | |
| 279 | enum AVEscapeMode { |
| 280 | AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode. |
| 281 | AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping. |
| 282 | AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping. |
| 283 | }; |
| 284 | |
| 285 | /** |
| 286 | * Consider spaces special and escape them even in the middle of the |
| 287 | * string. |
| 288 | * |
| 289 | * This is equivalent to adding the whitespace characters to the special |
| 290 | * characters lists, except it is guaranteed to use the exact same list |
| 291 | * of whitespace characters as the rest of libavutil. |
| 292 | */ |
| 293 | #define AV_ESCAPE_FLAG_WHITESPACE 0x01 |
| 294 | |
| 295 | /** |
| 296 | * Escape only specified special characters. |
| 297 | * Without this flag, escape also any characters that may be considered |
| 298 | * special by av_get_token(), such as the single quote. |
| 299 | */ |
| 300 | #define AV_ESCAPE_FLAG_STRICT 0x02 |
| 301 | |
| 302 | /** |
| 303 | * Escape string in src, and put the escaped string in an allocated |
| 304 | * string in *dst, which must be freed with av_free(). |
| 305 | * |
| 306 | * @param dst pointer where an allocated string is put |
| 307 | * @param src string to escape, must be non-NULL |
| 308 | * @param special_chars string containing the special characters which |
| 309 | * need to be escaped, can be NULL |
| 310 | * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros. |
| 311 | * Any unknown value for mode will be considered equivalent to |
| 312 | * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without |
| 313 | * notice. |
| 314 | * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros |
| 315 | * @return the length of the allocated string, or a negative error code in case of error |
| 316 | * @see av_bprint_escape() |
| 317 | */ |
| 318 | int av_escape(char **dst, const char *src, const char *special_chars, |
| 319 | enum AVEscapeMode mode, int flags); |
| 320 | |
| 321 | #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF |
| 322 | #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF |
| 323 | #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes |
| 324 | #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML |
| 325 | |
| 326 | #define AV_UTF8_FLAG_ACCEPT_ALL \ |
| 327 | AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES |
| 328 | |
| 329 | /** |
| 330 | * Read and decode a single UTF-8 code point (character) from the |
| 331 | * buffer in *buf, and update *buf to point to the next byte to |
| 332 | * decode. |
| 333 | * |
| 334 | * In case of an invalid byte sequence, the pointer will be updated to |
| 335 | * the next byte after the invalid sequence and the function will |
| 336 | * return an error code. |
| 337 | * |
| 338 | * Depending on the specified flags, the function will also fail in |
| 339 | * case the decoded code point does not belong to a valid range. |
| 340 | * |
| 341 | * @note For speed-relevant code a carefully implemented use of |
| 342 | * GET_UTF8() may be preferred. |
| 343 | * |
| 344 | * @param codep pointer used to return the parsed code in case of success. |
| 345 | * The value in *codep is set even in case the range check fails. |
| 346 | * @param bufp pointer to the address the first byte of the sequence |
| 347 | * to decode, updated by the function to point to the |
| 348 | * byte next after the decoded sequence |
| 349 | * @param buf_end pointer to the end of the buffer, points to the next |
| 350 | * byte past the last in the buffer. This is used to |
| 351 | * avoid buffer overreads (in case of an unfinished |
| 352 | * UTF-8 sequence towards the end of the buffer). |
| 353 | * @param flags a collection of AV_UTF8_FLAG_* flags |
| 354 | * @return >= 0 in case a sequence was successfully read, a negative |
| 355 | * value in case of invalid sequence |
| 356 | */ |
| 357 | int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, |
| 358 | unsigned int flags); |
| 359 | |
| 360 | /** |
| 361 | * Check if a name is in a list. |
| 362 | * @returns 0 if not found, or the 1 based index where it has been found in the |
| 363 | * list. |
| 364 | */ |
| 365 | int av_match_list(const char *name, const char *list, char separator); |
| 366 | |
| 367 | /** |
| 368 | * @} |
| 369 | */ |
| 370 | |
| 371 | #endif /* AVUTIL_AVSTRING_H */ |