53
54 // converts a utf8 string to quoted ascii
55 static void as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen);
56
57 #ifndef PRODUCT
58 // converts a quoted ascii string to utf8 string. returns the original
59 // string unchanged if nothing needs to be done.
60 static const char* from_quoted_ascii(const char* quoted_ascii_string);
61 #endif
62
63 // decodes the current utf8 character, stores the result in value,
64 // and returns the end of the current utf8 chararacter.
65 template<typename T> static char* next(const char* str, T* value);
66
67 // decodes the current utf8 character, gets the supplementary character instead of
68 // the surrogate pair when seeing a supplementary character in string,
69 // stores the result in value, and returns the end of the current utf8 chararacter.
70 static char* next_character(const char* str, jint* value);
71
72 // Utility methods
73 static const jbyte* strrchr(const jbyte* base, int length, jbyte c);
74 static bool equal(const jbyte* base1, int length1, const jbyte* base2,int length2);
75 static bool is_supplementary_character(const unsigned char* str);
76 static jint get_supplementary_character(const unsigned char* str);
77
78 static bool is_legal_utf8(const unsigned char* buffer, int length,
79 bool version_leq_47);
80 };
81
82
83 // Low-level interface for UNICODE strings
84
85 // A unicode string represents a string in the UTF-16 format in which supplementary
86 // characters are represented by surrogate pairs. Index values refer to char code
87 // units, so a supplementary character uses two positions in a unicode string.
88
89 class UNICODE : AllStatic {
90 public:
91 // checks if the given unicode character can be encoded as latin1
92 static bool is_latin1(jchar c);
93
|
53
54 // converts a utf8 string to quoted ascii
55 static void as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen);
56
57 #ifndef PRODUCT
58 // converts a quoted ascii string to utf8 string. returns the original
59 // string unchanged if nothing needs to be done.
60 static const char* from_quoted_ascii(const char* quoted_ascii_string);
61 #endif
62
63 // decodes the current utf8 character, stores the result in value,
64 // and returns the end of the current utf8 chararacter.
65 template<typename T> static char* next(const char* str, T* value);
66
67 // decodes the current utf8 character, gets the supplementary character instead of
68 // the surrogate pair when seeing a supplementary character in string,
69 // stores the result in value, and returns the end of the current utf8 chararacter.
70 static char* next_character(const char* str, jint* value);
71
72 // Utility methods
73
74 // Returns NULL if 'c' it not found. This only works as long
75 // as 'c' is an ASCII character
76 static const jbyte* strrchr(const jbyte* base, int length, jbyte c) {
77 assert(length >= 0, "sanity check");
78 assert(c >= 0, "does not work for non-ASCII characters");
79 // Skip backwards in string until 'c' is found or end is reached
80 while(--length >= 0 && base[length] != c);
81 return (length < 0) ? NULL : &base[length];
82 }
83 static bool equal(const jbyte* base1, int length1, const jbyte* base2,int length2);
84 static bool is_supplementary_character(const unsigned char* str);
85 static jint get_supplementary_character(const unsigned char* str);
86
87 static bool is_legal_utf8(const unsigned char* buffer, int length,
88 bool version_leq_47);
89 };
90
91
92 // Low-level interface for UNICODE strings
93
94 // A unicode string represents a string in the UTF-16 format in which supplementary
95 // characters are represented by surrogate pairs. Index values refer to char code
96 // units, so a supplementary character uses two positions in a unicode string.
97
98 class UNICODE : AllStatic {
99 public:
100 // checks if the given unicode character can be encoded as latin1
101 static bool is_latin1(jchar c);
102
|