master
Raw Download raw file
  1
  2  'use strict';
  3
  4  /***
  5   *
  6   * @package Language
  7   * @dependency string
  8   * @description Detecting language by character block. Full-width <-> half-width character conversion. Hiragana and Katakana conversions.
  9   *
 10   ***/
 11
 12  /***
 13   * String module
 14   *
 15   ***/
 16
 17
 18  /***
 19   * @method has[Script]()
 20   * @returns Boolean
 21   * @short Returns true if the string contains any characters in that script.
 22   *
 23   * @set
 24   *   hasArabic
 25   *   hasCyrillic
 26   *   hasGreek
 27   *   hasHangul
 28   *   hasHan
 29   *   hasKanji
 30   *   hasHebrew
 31   *   hasHiragana
 32   *   hasKana
 33   *   hasKatakana
 34   *   hasLatin
 35   *   hasThai
 36   *   hasDevanagari
 37   *
 38   * @example
 39   *
 40   *   'أتكلم'.hasArabic()          -> true
 41   *   'визит'.hasCyrillic()        -> true
 42   *   '잘 먹겠습니다!'.hasHangul() -> true
 43   *   'ミックスです'.hasKatakana() -> true
 44   *   "l'année".hasLatin()         -> true
 45   *
 46   ***
 47   * @method is[Script]()
 48   * @returns Boolean
 49   * @short Returns true if the string contains only characters in that script. Whitespace is ignored.
 50   *
 51   * @set
 52   *   isArabic
 53   *   isCyrillic
 54   *   isGreek
 55   *   isHangul
 56   *   isHan
 57   *   isKanji
 58   *   isHebrew
 59   *   isHiragana
 60   *   isKana
 61   *   isKatakana
 62   *   isKatakana
 63   *   isThai
 64   *   isDevanagari
 65   *
 66   * @example
 67   *
 68   *   'أتكلم'.isArabic()          -> true
 69   *   'визит'.isCyrillic()        -> true
 70   *   '잘 먹겠습니다!'.isHangul() -> true
 71   *   'ミックスです'.isKatakana() -> false
 72   *   "l'année".isLatin()         -> true
 73   *
 74   ***/
 75  var unicodeScripts = [
 76    { names: ['Arabic'],      source: '\u0600-\u06FF' },
 77    { names: ['Cyrillic'],    source: '\u0400-\u04FF' },
 78    { names: ['Devanagari'],  source: '\u0900-\u097F' },
 79    { names: ['Greek'],       source: '\u0370-\u03FF' },
 80    { names: ['Hangul'],      source: '\uAC00-\uD7AF\u1100-\u11FF' },
 81    { names: ['Han','Kanji'], source: '\u4E00-\u9FFF\uF900-\uFAFF' },
 82    { names: ['Hebrew'],      source: '\u0590-\u05FF' },
 83    { names: ['Hiragana'],    source: '\u3040-\u309F\u30FB-\u30FC' },
 84    { names: ['Kana'],        source: '\u3040-\u30FF\uFF61-\uFF9F' },
 85    { names: ['Katakana'],    source: '\u30A0-\u30FF\uFF61-\uFF9F' },
 86    { names: ['Latin'],       source: '\u0001-\u007F\u0080-\u00FF\u0100-\u017F\u0180-\u024F' },
 87    { names: ['Thai'],        source: '\u0E00-\u0E7F' }
 88  ];
 89
 90  function buildUnicodeScripts() {
 91    unicodeScripts.forEach(function(s) {
 92      var is = regexp('^['+s.source+'\\s]+$');
 93      var has = regexp('['+s.source+']');
 94      s.names.forEach(function(name) {
 95        defineProperty(string.prototype, 'is' + name, function() { return is.test(this.trim()); });
 96        defineProperty(string.prototype, 'has' + name, function() { return has.test(this); });
 97      });
 98    });
 99  }
100
101  // Support for converting character widths and katakana to hiragana.
102
103  var HALF_WIDTH_TO_FULL_WIDTH_TRAVERSAL = 65248;
104
105  var widthConversionRanges = [
106    { type: 'a', start: 65,  end: 90  },
107    { type: 'a', start: 97,  end: 122 },
108    { type: 'n', start: 48,  end: 57  },
109    { type: 'p', start: 33,  end: 47  },
110    { type: 'p', start: 58,  end: 64  },
111    { type: 'p', start: 91,  end: 96  },
112    { type: 'p', start: 123, end: 126 }
113  ];
114
115  var WidthConversionTable;
116  var allHankaku   = /[\u0020-\u00A5]|[\uFF61-\uFF9F][゙゚]?/g;
117  var allZenkaku   = /[\u3000-\u301C]|[\u301A-\u30FC]|[\uFF01-\uFF60]|[\uFFE0-\uFFE6]/g;
118  var hankakuPunctuation  = '。、「」¥¢£';
119  var zenkakuPunctuation  = '。、「」¥¢£';
120  var voicedKatakana      = /[カキクケコサシスセソタチツテトハヒフヘホ]/;
121  var semiVoicedKatakana  = /[ハヒフヘホヲ]/;
122  var hankakuKatakana     = 'アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤャユュヨョラリルレロワヲンー・';
123  var zenkakuKatakana     = 'アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤャユュヨョラリルレロワヲンー・';
124
125  function convertCharacterWidth(str, args, reg, type) {
126    if(!WidthConversionTable) {
127      buildWidthConversionTables();
128    }
129    var mode = multiArgs(args).join(''), table = WidthConversionTable[type];
130    mode = mode.replace(/all/, '').replace(/(\w)lphabet|umbers?|atakana|paces?|unctuation/g, '$1');
131    return str.replace(reg, function(c) {
132      if(table[c] && (!mode || mode.has(table[c].type))) {
133        return table[c].to;
134      } else {
135        return c;
136      }
137    });
138  }
139
140  function buildWidthConversionTables() {
141    var hankaku;
142    WidthConversionTable = {
143      'zenkaku': {},
144      'hankaku': {}
145    };
146    widthConversionRanges.forEach(function(r) {
147      simpleRepeat(r.end - r.start + 1, function(n) {
148        n += r.start;
149        setWidthConversion(r.type, chr(n), chr(n + HALF_WIDTH_TO_FULL_WIDTH_TRAVERSAL));
150      });
151    });
152    zenkakuKatakana.each(function(c, i) {
153      hankaku = hankakuKatakana.charAt(i);
154      setWidthConversion('k', hankaku, c);
155      if(c.match(voicedKatakana)) {
156        setWidthConversion('k', hankaku + '゙', c.shift(1));
157      }
158      if(c.match(semiVoicedKatakana)) {
159        setWidthConversion('k', hankaku + '゚', c.shift(2));
160      }
161    });
162    zenkakuPunctuation.each(function(c, i) {
163      setWidthConversion('p', hankakuPunctuation.charAt(i), c);
164    });
165    setWidthConversion('k', 'ヴ', 'ヴ');
166    setWidthConversion('k', 'ヺ', 'ヺ');
167    setWidthConversion('s', ' ', ' ');
168  }
169
170  function setWidthConversion(type, half, full) {
171    WidthConversionTable['zenkaku'][half] = { type: type, to: full };
172    WidthConversionTable['hankaku'][full] = { type: type, to: half };
173  }
174
175
176  extend(string, true, true, {
177
178    /***
179     * @method hankaku([mode] = 'all')
180     * @returns String
181     * @short Converts full-width characters (zenkaku) to half-width (hankaku).
182     * @extra [mode] accepts any combination of "a" (alphabet), "n" (numbers), "k" (katakana), "s" (spaces), "p" (punctuation), or "all".
183     * @example
184     *
185     *   'タロウ YAMADAです!'.hankaku()                      -> 'タロウ YAMADAです!'
186     *   'タロウ YAMADAです!'.hankaku('a')                   -> 'タロウ YAMADAです!'
187     *   'タロウ YAMADAです!'.hankaku('alphabet')            -> 'タロウ YAMADAです!'
188     *   'タロウです! 25歳です!'.hankaku('katakana', 'numbers') -> 'タロウです! 25歳です!'
189     *   'タロウです! 25歳です!'.hankaku('k', 'n')              -> 'タロウです! 25歳です!'
190     *   'タロウです! 25歳です!'.hankaku('kn')                  -> 'タロウです! 25歳です!'
191     *   'タロウです! 25歳です!'.hankaku('sp')                  -> 'タロウです! 25歳です!'
192     *
193     ***/
194    'hankaku': function() {
195      return convertCharacterWidth(this, arguments, allZenkaku, 'hankaku');
196    },
197
198    /***
199     * @method zenkaku([mode] = 'all')
200     * @returns String
201     * @short Converts half-width characters (hankaku) to full-width (zenkaku).
202     * @extra [mode] accepts any combination of "a" (alphabet), "n" (numbers), "k" (katakana), "s" (spaces), "p" (punctuation), or "all".
203     * @example
204     *
205     *   'タロウ YAMADAです!'.zenkaku()                         -> 'タロウ YAMADAです!'
206     *   'タロウ YAMADAです!'.zenkaku('a')                      -> 'タロウ YAMADAです!'
207     *   'タロウ YAMADAです!'.zenkaku('alphabet')               -> 'タロウ YAMADAです!'
208     *   'タロウです! 25歳です!'.zenkaku('katakana', 'numbers') -> 'タロウです! 25歳です!'
209     *   'タロウです! 25歳です!'.zenkaku('k', 'n')              -> 'タロウです! 25歳です!'
210     *   'タロウです! 25歳です!'.zenkaku('kn')                  -> 'タロウです! 25歳です!'
211     *   'タロウです! 25歳です!'.zenkaku('sp')                  -> 'タロウです! 25歳です!'
212     *
213     ***/
214    'zenkaku': function() {
215      return convertCharacterWidth(this, arguments, allHankaku, 'zenkaku');
216    },
217
218    /***
219     * @method hiragana([all] = true)
220     * @returns String
221     * @short Converts katakana into hiragana.
222     * @extra If [all] is false, only full-width katakana will be converted.
223     * @example
224     *
225     *   'カタカナ'.hiragana()   -> 'かたかな'
226     *   'コンニチハ'.hiragana() -> 'こんにちは'
227     *   'カタカナ'.hiragana()       -> 'かたかな'
228     *   'カタカナ'.hiragana(false)  -> 'カタカナ'
229     *
230     ***/
231    'hiragana': function(all) {
232      var str = this;
233      if(all !== false) {
234        str = str.zenkaku('k');
235      }
236      return str.replace(/[\u30A1-\u30F6]/g, function(c) {
237        return c.shift(-96);
238      });
239    },
240
241    /***
242     * @method katakana()
243     * @returns String
244     * @short Converts hiragana into katakana.
245     * @example
246     *
247     *   'かたかな'.katakana()   -> 'カタカナ'
248     *   'こんにちは'.katakana() -> 'コンニチハ'
249     *
250     ***/
251    'katakana': function() {
252      return this.replace(/[\u3041-\u3096]/g, function(c) {
253        return c.shift(96);
254      });
255    }
256
257
258  });
259
260  buildUnicodeScripts();
261