Menu

[r1604]: / framework / trunk / cppcms / encoding.h  Maximize  Restore  History

Download this file

132 lines (121 with data), 6.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2008-2010 Artyom Beilis (Tonkikh) <artyomtnk@yahoo.com>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef CPPCMS_ENCODING_H
#define CPPCMS_ENCODING_H
#include <string>
#include <map>
#include <locale>
#include <cppcms/defs.h>
#include <cppcms/config.h>
namespace cppcms {
///
/// \brief this Namespace holds various function for dealing with encoding
///
///
namespace encoding {
/// Note: all these function assume that control characters that invalid in HTML are illegal.
/// For example. NUL is legal UTF-8 code but it is illegal in terms of HTML validity thus,
/// valid_utf8 would return false.
///
/// Check if string in range [begin,end) is valid in the locale \a loc and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(std::locale const &loc,char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid UTF-8 and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid_utf8(char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid encoding \a encoding and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(char const *encoding,char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid encoding \a encoding and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(std::string const &encoding,char const *begin,char const *end,size_t &count);
///
/// Returns true if ASCII is strict subset of the encoding, i.e. All non-ASCII characters
/// encoding using bytes >= 0x80.
///
/// This is very important for XML or HTML parsing to prevent invlaid detenction of HTML specific
/// characters. So filters that work with encodings that are not ASCII compatible should convert
/// the text to UTF-8 and then convert them back.
///
/// These are UTF-8, ISO-8859-*, windows-12* and koi encodings families.
///
///
bool CPPCMS_API is_ascii_compatible(std::string const &encoding);
///
/// Check if the \a encoding is valid for the text in range [\a begin, \a end) , if it is valid,
/// returns true otherwise removes all invalid characters (if replace == 0) or replaces them with \a replace
/// and saves the result to \a output returning false.
///
/// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-*
/// and single byte windows-12XX, and koi family
///
bool CPPCMS_API validate_or_filter( std::string const &encoding,
char const *begin,char const *end,
std::string &output,
char replace = 0);
///
/// Convert string in range [begin,end) from local 8 bit encoding according to locale \a loc to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(std::locale const &loc,char const *begin,char const *end);
///
/// Convert string in range [begin,end) from local 8 bit encoding \a encoding to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(char const *encoding,char const *begin,char const *end);
///
/// Convert string \a str from local 8 bit encoding according to locale \a loc to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(std::locale const &loc,std::string const &str);
///
/// Convert string \a str from local 8 bit encoding according to encoding \a encoding
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(char const *encoding,std::string const &str);
///
/// Convert UTF-8 string in range [begin,end) to local 8 bit encoding according to locale \a loc.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(std::locale const &loc,char const *begin,char const *end);
///
/// Convert UTF-8 string in range [begin,end) to local 8 bit encoding \a encoding.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(char const *encoding,char const *begin,char const *end);
///
/// Convert UTF-8 string \a str to local 8 bit encoding according to locale \a loc.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(std::locale const &loc,std::string const &str);
///
/// Convert UTF-8 string \a str to local 8 bit encoding \a encoding.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(char const *encoding,std::string const &str);
} // encoding
} // cppcms
#endif
MongoDB Logo MongoDB