master
1
2 'use strict';
3
4 /***
5 *
6 * @package Language
7 * @dependency string
8 * @description Detecting language by character block. Full-width <-> half-width character conversion. Hiragana and Katakana conversions.
9 *
10 ***/
11
12 /***
13 * String module
14 *
15 ***/
16
17
18 /***
19 * @method has[Script]()
20 * @returns Boolean
21 * @short Returns true if the string contains any characters in that script.
22 *
23 * @set
24 * hasArabic
25 * hasCyrillic
26 * hasGreek
27 * hasHangul
28 * hasHan
29 * hasKanji
30 * hasHebrew
31 * hasHiragana
32 * hasKana
33 * hasKatakana
34 * hasLatin
35 * hasThai
36 * hasDevanagari
37 *
38 * @example
39 *
40 * 'أتكلم'.hasArabic() -> true
41 * 'визит'.hasCyrillic() -> true
42 * '잘 먹겠습니다!'.hasHangul() -> true
43 * 'ミックスです'.hasKatakana() -> true
44 * "l'année".hasLatin() -> true
45 *
46 ***
47 * @method is[Script]()
48 * @returns Boolean
49 * @short Returns true if the string contains only characters in that script. Whitespace is ignored.
50 *
51 * @set
52 * isArabic
53 * isCyrillic
54 * isGreek
55 * isHangul
56 * isHan
57 * isKanji
58 * isHebrew
59 * isHiragana
60 * isKana
61 * isKatakana
62 * isKatakana
63 * isThai
64 * isDevanagari
65 *
66 * @example
67 *
68 * 'أتكلم'.isArabic() -> true
69 * 'визит'.isCyrillic() -> true
70 * '잘 먹겠습니다!'.isHangul() -> true
71 * 'ミックスです'.isKatakana() -> false
72 * "l'année".isLatin() -> true
73 *
74 ***/
75 var unicodeScripts = [
76 { names: ['Arabic'], source: '\u0600-\u06FF' },
77 { names: ['Cyrillic'], source: '\u0400-\u04FF' },
78 { names: ['Devanagari'], source: '\u0900-\u097F' },
79 { names: ['Greek'], source: '\u0370-\u03FF' },
80 { names: ['Hangul'], source: '\uAC00-\uD7AF\u1100-\u11FF' },
81 { names: ['Han','Kanji'], source: '\u4E00-\u9FFF\uF900-\uFAFF' },
82 { names: ['Hebrew'], source: '\u0590-\u05FF' },
83 { names: ['Hiragana'], source: '\u3040-\u309F\u30FB-\u30FC' },
84 { names: ['Kana'], source: '\u3040-\u30FF\uFF61-\uFF9F' },
85 { names: ['Katakana'], source: '\u30A0-\u30FF\uFF61-\uFF9F' },
86 { names: ['Latin'], source: '\u0001-\u007F\u0080-\u00FF\u0100-\u017F\u0180-\u024F' },
87 { names: ['Thai'], source: '\u0E00-\u0E7F' }
88 ];
89
90 function buildUnicodeScripts() {
91 unicodeScripts.forEach(function(s) {
92 var is = regexp('^['+s.source+'\\s]+$');
93 var has = regexp('['+s.source+']');
94 s.names.forEach(function(name) {
95 defineProperty(string.prototype, 'is' + name, function() { return is.test(this.trim()); });
96 defineProperty(string.prototype, 'has' + name, function() { return has.test(this); });
97 });
98 });
99 }
100
101 // Support for converting character widths and katakana to hiragana.
102
103 var HALF_WIDTH_TO_FULL_WIDTH_TRAVERSAL = 65248;
104
105 var widthConversionRanges = [
106 { type: 'a', start: 65, end: 90 },
107 { type: 'a', start: 97, end: 122 },
108 { type: 'n', start: 48, end: 57 },
109 { type: 'p', start: 33, end: 47 },
110 { type: 'p', start: 58, end: 64 },
111 { type: 'p', start: 91, end: 96 },
112 { type: 'p', start: 123, end: 126 }
113 ];
114
115 var WidthConversionTable;
116 var allHankaku = /[\u0020-\u00A5]|[\uFF61-\uFF9F][゙゚]?/g;
117 var allZenkaku = /[\u3000-\u301C]|[\u301A-\u30FC]|[\uFF01-\uFF60]|[\uFFE0-\uFFE6]/g;
118 var hankakuPunctuation = '。、「」¥¢£';
119 var zenkakuPunctuation = '。、「」¥¢£';
120 var voicedKatakana = /[カキクケコサシスセソタチツテトハヒフヘホ]/;
121 var semiVoicedKatakana = /[ハヒフヘホヲ]/;
122 var hankakuKatakana = 'アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤャユュヨョラリルレロワヲンー・';
123 var zenkakuKatakana = 'アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤャユュヨョラリルレロワヲンー・';
124
125 function convertCharacterWidth(str, args, reg, type) {
126 if(!WidthConversionTable) {
127 buildWidthConversionTables();
128 }
129 var mode = multiArgs(args).join(''), table = WidthConversionTable[type];
130 mode = mode.replace(/all/, '').replace(/(\w)lphabet|umbers?|atakana|paces?|unctuation/g, '$1');
131 return str.replace(reg, function(c) {
132 if(table[c] && (!mode || mode.has(table[c].type))) {
133 return table[c].to;
134 } else {
135 return c;
136 }
137 });
138 }
139
140 function buildWidthConversionTables() {
141 var hankaku;
142 WidthConversionTable = {
143 'zenkaku': {},
144 'hankaku': {}
145 };
146 widthConversionRanges.forEach(function(r) {
147 simpleRepeat(r.end - r.start + 1, function(n) {
148 n += r.start;
149 setWidthConversion(r.type, chr(n), chr(n + HALF_WIDTH_TO_FULL_WIDTH_TRAVERSAL));
150 });
151 });
152 zenkakuKatakana.each(function(c, i) {
153 hankaku = hankakuKatakana.charAt(i);
154 setWidthConversion('k', hankaku, c);
155 if(c.match(voicedKatakana)) {
156 setWidthConversion('k', hankaku + '゙', c.shift(1));
157 }
158 if(c.match(semiVoicedKatakana)) {
159 setWidthConversion('k', hankaku + '゚', c.shift(2));
160 }
161 });
162 zenkakuPunctuation.each(function(c, i) {
163 setWidthConversion('p', hankakuPunctuation.charAt(i), c);
164 });
165 setWidthConversion('k', 'ヴ', 'ヴ');
166 setWidthConversion('k', 'ヺ', 'ヺ');
167 setWidthConversion('s', ' ', ' ');
168 }
169
170 function setWidthConversion(type, half, full) {
171 WidthConversionTable['zenkaku'][half] = { type: type, to: full };
172 WidthConversionTable['hankaku'][full] = { type: type, to: half };
173 }
174
175
176 extend(string, true, true, {
177
178 /***
179 * @method hankaku([mode] = 'all')
180 * @returns String
181 * @short Converts full-width characters (zenkaku) to half-width (hankaku).
182 * @extra [mode] accepts any combination of "a" (alphabet), "n" (numbers), "k" (katakana), "s" (spaces), "p" (punctuation), or "all".
183 * @example
184 *
185 * 'タロウ YAMADAです!'.hankaku() -> 'タロウ YAMADAです!'
186 * 'タロウ YAMADAです!'.hankaku('a') -> 'タロウ YAMADAです!'
187 * 'タロウ YAMADAです!'.hankaku('alphabet') -> 'タロウ YAMADAです!'
188 * 'タロウです! 25歳です!'.hankaku('katakana', 'numbers') -> 'タロウです! 25歳です!'
189 * 'タロウです! 25歳です!'.hankaku('k', 'n') -> 'タロウです! 25歳です!'
190 * 'タロウです! 25歳です!'.hankaku('kn') -> 'タロウです! 25歳です!'
191 * 'タロウです! 25歳です!'.hankaku('sp') -> 'タロウです! 25歳です!'
192 *
193 ***/
194 'hankaku': function() {
195 return convertCharacterWidth(this, arguments, allZenkaku, 'hankaku');
196 },
197
198 /***
199 * @method zenkaku([mode] = 'all')
200 * @returns String
201 * @short Converts half-width characters (hankaku) to full-width (zenkaku).
202 * @extra [mode] accepts any combination of "a" (alphabet), "n" (numbers), "k" (katakana), "s" (spaces), "p" (punctuation), or "all".
203 * @example
204 *
205 * 'タロウ YAMADAです!'.zenkaku() -> 'タロウ YAMADAです!'
206 * 'タロウ YAMADAです!'.zenkaku('a') -> 'タロウ YAMADAです!'
207 * 'タロウ YAMADAです!'.zenkaku('alphabet') -> 'タロウ YAMADAです!'
208 * 'タロウです! 25歳です!'.zenkaku('katakana', 'numbers') -> 'タロウです! 25歳です!'
209 * 'タロウです! 25歳です!'.zenkaku('k', 'n') -> 'タロウです! 25歳です!'
210 * 'タロウです! 25歳です!'.zenkaku('kn') -> 'タロウです! 25歳です!'
211 * 'タロウです! 25歳です!'.zenkaku('sp') -> 'タロウです! 25歳です!'
212 *
213 ***/
214 'zenkaku': function() {
215 return convertCharacterWidth(this, arguments, allHankaku, 'zenkaku');
216 },
217
218 /***
219 * @method hiragana([all] = true)
220 * @returns String
221 * @short Converts katakana into hiragana.
222 * @extra If [all] is false, only full-width katakana will be converted.
223 * @example
224 *
225 * 'カタカナ'.hiragana() -> 'かたかな'
226 * 'コンニチハ'.hiragana() -> 'こんにちは'
227 * 'カタカナ'.hiragana() -> 'かたかな'
228 * 'カタカナ'.hiragana(false) -> 'カタカナ'
229 *
230 ***/
231 'hiragana': function(all) {
232 var str = this;
233 if(all !== false) {
234 str = str.zenkaku('k');
235 }
236 return str.replace(/[\u30A1-\u30F6]/g, function(c) {
237 return c.shift(-96);
238 });
239 },
240
241 /***
242 * @method katakana()
243 * @returns String
244 * @short Converts hiragana into katakana.
245 * @example
246 *
247 * 'かたかな'.katakana() -> 'カタカナ'
248 * 'こんにちは'.katakana() -> 'コンニチハ'
249 *
250 ***/
251 'katakana': function() {
252 return this.replace(/[\u3041-\u3096]/g, function(c) {
253 return c.shift(96);
254 });
255 }
256
257
258 });
259
260 buildUnicodeScripts();
261