Skip to content

Commit 09913ec

Browse files
author
mikob
committed
Removed thin abstraction layer "TokenizerBuilder" combining into loader. Made loader/builder terminology more consistent.
1 parent 71ea847 commit 09913ec

File tree

14 files changed

+63
-171
lines changed

14 files changed

+63
-171
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Load this library as follows:
4747

4848
You can prepare tokenizer like this:
4949

50-
kuromoji.builder({ dicPath: "path/to/dictionary/dir/" }).build(function (err, tokenizer) {
50+
kuromoji.loader({ dic_path: "path/to/dictionary/dir/" }).load(function (err, tokenizer) {
5151
// tokenizer is ready
5252
var path = tokenizer.tokenize("すもももももももものうち");
5353
console.log(path);
@@ -71,7 +71,7 @@ In your HTML:
7171

7272
In your JavaScript:
7373

74-
kuromoji.builder({ dicPath: "/url/to/dictionary/dir/" }).build(function (err, tokenizer) {
74+
kuromoji.loader({ dic_path: "/url/to/dictionary/dir/" }).load(function (err, tokenizer) {
7575
// tokenizer is ready
7676
var path = tokenizer.tokenize("すもももももももものうち");
7777
console.log(path);

build/kuromoji.js

Lines changed: 33 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -6963,64 +6963,7 @@ Tokenizer.prototype.getLattice = function (text) {
69636963

69646964
module.exports = Tokenizer;
69656965

6966-
},{"./util/IpadicFormatter":22,"./viterbi/ViterbiBuilder":24,"./viterbi/ViterbiSearcher":27}],7:[function(require,module,exports){
6967-
/*
6968-
* Copyright 2014 Takuya Asano
6969-
* Copyright 2010-2014 Atilika Inc. and contributors
6970-
*
6971-
* Licensed under the Apache License, Version 2.0 (the "License");
6972-
* you may not use this file except in compliance with the License.
6973-
* You may obtain a copy of the License at
6974-
*
6975-
* http://www.apache.org/licenses/LICENSE-2.0
6976-
*
6977-
* Unless required by applicable law or agreed to in writing, software
6978-
* distributed under the License is distributed on an "AS IS" BASIS,
6979-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
6980-
* See the License for the specific language governing permissions and
6981-
* limitations under the License.
6982-
*/
6983-
6984-
"use strict";
6985-
6986-
var Tokenizer = require("./Tokenizer");
6987-
var DictionaryLoader = require("./loader/NodeDictionaryLoader");
6988-
6989-
/**
6990-
* TokenizerBuilder create Tokenizer instance.
6991-
* @param {Object} option JSON object which have key-value pairs settings
6992-
* @param {string} option.dicPath Dictionary directory path (or URL using in browser)
6993-
* @constructor
6994-
*/
6995-
function TokenizerBuilder(option) {
6996-
if (option.dicPath == null) {
6997-
this.dic_path = "dict/";
6998-
} else {
6999-
this.dic_path = option.dicPath;
7000-
}
7001-
}
7002-
7003-
/**
7004-
* Build Tokenizer instance by asynchronous manner
7005-
* @param {TokenizerBuilder~onLoad} callback Callback function
7006-
*/
7007-
TokenizerBuilder.prototype.build = function (callback) {
7008-
var loader = new DictionaryLoader(this.dic_path);
7009-
loader.load(function (err, dic) {
7010-
callback(err, new Tokenizer(dic));
7011-
});
7012-
};
7013-
7014-
/**
7015-
* Callback used by build
7016-
* @callback TokenizerBuilder~onLoad
7017-
* @param {Object} err Error object
7018-
* @param {Tokenizer} tokenizer Prepared Tokenizer
7019-
*/
7020-
7021-
module.exports = TokenizerBuilder;
7022-
7023-
},{"./Tokenizer":6,"./loader/NodeDictionaryLoader":19}],8:[function(require,module,exports){
6966+
},{"./util/IpadicFormatter":21,"./viterbi/ViterbiBuilder":23,"./viterbi/ViterbiSearcher":26}],7:[function(require,module,exports){
70246967
/*
70256968
* Copyright 2014 Takuya Asano
70266969
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7059,7 +7002,7 @@ function CharacterClass(class_id, class_name, is_always_invoke, is_grouping, max
70597002

70607003
module.exports = CharacterClass;
70617004

7062-
},{}],9:[function(require,module,exports){
7005+
},{}],8:[function(require,module,exports){
70637006
/*
70647007
* Copyright 2014 Takuya Asano
70657008
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7266,7 +7209,7 @@ CharacterDefinition.prototype.lookup = function (ch) {
72667209

72677210
module.exports = CharacterDefinition;
72687211

7269-
},{"../util/SurrogateAwareString":23,"./CharacterClass":8,"./InvokeDefinitionMap":12}],10:[function(require,module,exports){
7212+
},{"../util/SurrogateAwareString":22,"./CharacterClass":7,"./InvokeDefinitionMap":11}],9:[function(require,module,exports){
72707213
/*
72717214
* Copyright 2014 Takuya Asano
72727215
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7327,7 +7270,7 @@ ConnectionCosts.prototype.loadConnectionCosts = function (connection_costs_buffe
73277270

73287271
module.exports = ConnectionCosts;
73297272

7330-
},{}],11:[function(require,module,exports){
7273+
},{}],10:[function(require,module,exports){
73317274
/*
73327275
* Copyright 2014 Takuya Asano
73337276
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7411,7 +7354,7 @@ DynamicDictionaries.prototype.loadUnknownDictionaries = function (unk_buffer, un
74117354

74127355
module.exports = DynamicDictionaries;
74137356

7414-
},{"./ConnectionCosts":10,"./TokenInfoDictionary":13,"./UnknownDictionary":14,"doublearray":2}],12:[function(require,module,exports){
7357+
},{"./ConnectionCosts":9,"./TokenInfoDictionary":12,"./UnknownDictionary":13,"doublearray":2}],11:[function(require,module,exports){
74157358
/*
74167359
* Copyright 2014 Takuya Asano
74177360
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7523,7 +7466,7 @@ InvokeDefinitionMap.prototype.toBuffer = function () {
75237466

75247467
module.exports = InvokeDefinitionMap;
75257468

7526-
},{"../util/ByteBuffer":21,"./CharacterClass":8}],13:[function(require,module,exports){
7469+
},{"../util/ByteBuffer":20,"./CharacterClass":7}],12:[function(require,module,exports){
75277470
/*
75287471
* Copyright 2014 Takuya Asano
75297472
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7677,7 +7620,7 @@ TokenInfoDictionary.prototype.getFeatures = function (token_info_id_str) {
76777620

76787621
module.exports = TokenInfoDictionary;
76797622

7680-
},{"../util/ByteBuffer":21}],14:[function(require,module,exports){
7623+
},{"../util/ByteBuffer":20}],13:[function(require,module,exports){
76817624
/*
76827625
* Copyright 2014 Takuya Asano
76837626
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7737,7 +7680,7 @@ UnknownDictionary.prototype.loadUnknownDictionaries = function (unk_buffer, unk_
77377680

77387681
module.exports = UnknownDictionary;
77397682

7740-
},{"../util/ByteBuffer":21,"./CharacterDefinition":9,"./TokenInfoDictionary":13}],15:[function(require,module,exports){
7683+
},{"../util/ByteBuffer":20,"./CharacterDefinition":8,"./TokenInfoDictionary":12}],14:[function(require,module,exports){
77417684
/*
77427685
* Copyright 2014 Takuya Asano
77437686
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7807,7 +7750,7 @@ CharacterDefinitionBuilder.prototype.build = function () {
78077750

78087751
module.exports = CharacterDefinitionBuilder;
78097752

7810-
},{"../CharacterDefinition":9,"../InvokeDefinitionMap":12}],16:[function(require,module,exports){
7753+
},{"../CharacterDefinition":8,"../InvokeDefinitionMap":11}],15:[function(require,module,exports){
78117754
/*
78127755
* Copyright 2014 Takuya Asano
78137756
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -7879,7 +7822,7 @@ ConnectionCostsBuilder.prototype.build = function () {
78797822

78807823
module.exports = ConnectionCostsBuilder;
78817824

7882-
},{"../ConnectionCosts":10}],17:[function(require,module,exports){
7825+
},{"../ConnectionCosts":9}],16:[function(require,module,exports){
78837826
/*
78847827
* Copyright 2014 Takuya Asano
78857828
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8039,7 +7982,7 @@ DictionaryBuilder.prototype.buildDoubleArray = function () {
80397982

80407983
module.exports = DictionaryBuilder;
80417984

8042-
},{"../DynamicDictionaries":11,"../TokenInfoDictionary":13,"../UnknownDictionary":14,"./CharacterDefinitionBuilder":15,"./ConnectionCostsBuilder":16,"doublearray":2}],18:[function(require,module,exports){
7985+
},{"../DynamicDictionaries":10,"../TokenInfoDictionary":12,"../UnknownDictionary":13,"./CharacterDefinitionBuilder":14,"./ConnectionCostsBuilder":15,"doublearray":2}],17:[function(require,module,exports){
80437986
/*
80447987
* Copyright 2014 Takuya Asano
80457988
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8059,22 +8002,22 @@ module.exports = DictionaryBuilder;
80598002

80608003
"use strict";
80618004

8062-
var TokenizerBuilder = require("./TokenizerBuilder");
80638005
var DictionaryBuilder = require("./dict/builder/DictionaryBuilder");
8006+
var DictionaryLoader = require("./loader/NodeDictionaryLoader");
80648007

80658008
// Public methods
80668009
var kuromoji = {
8067-
builder: function (option) {
8068-
return new TokenizerBuilder(option);
8010+
loader: function (options) {
8011+
return new DictionaryLoader(options);
80698012
},
8070-
dictionaryBuilder: function () {
8013+
builder: function () {
80718014
return new DictionaryBuilder();
80728015
}
80738016
};
80748017

80758018
module.exports = kuromoji;
80768019

8077-
},{"./TokenizerBuilder":7,"./dict/builder/DictionaryBuilder":17}],19:[function(require,module,exports){
8020+
},{"./dict/builder/DictionaryBuilder":16,"./loader/NodeDictionaryLoader":18}],18:[function(require,module,exports){
80788021
/*
80798022
* Copyright 2014 Takuya Asano
80808023
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8099,11 +8042,11 @@ var DictionaryLoader = require("./DictionaryLoader");
80998042

81008043
/**
81018044
* BrowserDictionaryLoader inherits DictionaryLoader, using jQuery XHR for download
8102-
* @param {string} dic_path Dictionary path
8045+
* @param {object} options Options for the dictionary
81038046
* @constructor
81048047
*/
8105-
function BrowserDictionaryLoader(dic_path) {
8106-
DictionaryLoader.apply(this, [dic_path]);
8048+
function BrowserDictionaryLoader(options) {
8049+
DictionaryLoader.call(this, options);
81078050
}
81088051

81098052
BrowserDictionaryLoader.prototype = Object.create(DictionaryLoader.prototype);
@@ -8114,6 +8057,7 @@ BrowserDictionaryLoader.prototype = Object.create(DictionaryLoader.prototype);
81148057
* @param {BrowserDictionaryLoader~onLoad} callback Callback function
81158058
*/
81168059
BrowserDictionaryLoader.prototype.loadArrayBuffer = function (url, callback) {
8060+
// Check if we have it cached
81178061
var xhr = new XMLHttpRequest();
81188062
xhr.open("GET", url, true);
81198063
xhr.responseType = "arraybuffer";
@@ -8143,7 +8087,7 @@ BrowserDictionaryLoader.prototype.loadArrayBuffer = function (url, callback) {
81438087

81448088
module.exports = BrowserDictionaryLoader;
81458089

8146-
},{"./DictionaryLoader":20,"zlibjs/bin/gunzip.min.js":5}],20:[function(require,module,exports){
8090+
},{"./DictionaryLoader":19,"zlibjs/bin/gunzip.min.js":5}],19:[function(require,module,exports){
81478091
/*
81488092
* Copyright 2014 Takuya Asano
81498093
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8166,15 +8110,16 @@ module.exports = BrowserDictionaryLoader;
81668110
var path = require("path");
81678111
var async = require("async");
81688112
var DynamicDictionaries = require("../dict/DynamicDictionaries");
8113+
var Tokenizer = require("../Tokenizer");
81698114

81708115
/**
81718116
* DictionaryLoader base constructor
81728117
* @param {string} dic_path Dictionary path
81738118
* @constructor
81748119
*/
8175-
function DictionaryLoader(dic_path) {
8120+
function DictionaryLoader(options) {
81768121
this.dic = new DynamicDictionaries();
8177-
this.dic_path = dic_path;
8122+
this.dic_path = options.dic_path || 'dict/';
81788123
}
81798124

81808125
DictionaryLoader.prototype.loadArrayBuffer = function (file, callback) {
@@ -8269,7 +8214,7 @@ DictionaryLoader.prototype.load = function (load_callback) {
82698214
});
82708215
}
82718216
], function (err) {
8272-
load_callback(err, dic);
8217+
load_callback(err, new Tokenizer(dic));
82738218
});
82748219
};
82758220

@@ -8282,7 +8227,7 @@ DictionaryLoader.prototype.load = function (load_callback) {
82828227

82838228
module.exports = DictionaryLoader;
82848229

8285-
},{"../dict/DynamicDictionaries":11,"async":1,"path":3}],21:[function(require,module,exports){
8230+
},{"../Tokenizer":6,"../dict/DynamicDictionaries":10,"async":1,"path":3}],20:[function(require,module,exports){
82868231
/*
82878232
* Copyright 2014 Takuya Asano
82888233
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8573,7 +8518,7 @@ ByteBuffer.prototype.getString = function (index) {
85738518

85748519
module.exports = ByteBuffer;
85758520

8576-
},{}],22:[function(require,module,exports){
8521+
},{}],21:[function(require,module,exports){
85778522
/*
85788523
* Copyright 2014 Takuya Asano
85798524
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8642,7 +8587,7 @@ IpadicFormatter.prototype.formatUnknownEntry = function (word_id, position, type
86428587

86438588
module.exports = IpadicFormatter;
86448589

8645-
},{}],23:[function(require,module,exports){
8590+
},{}],22:[function(require,module,exports){
86468591
/*
86478592
* Copyright 2014 Takuya Asano
86488593
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8735,7 +8680,7 @@ SurrogateAwareString.isSurrogatePair = function (ch) {
87358680

87368681
module.exports = SurrogateAwareString;
87378682

8738-
},{}],24:[function(require,module,exports){
8683+
},{}],23:[function(require,module,exports){
87398684
/*
87408685
* Copyright 2014 Takuya Asano
87418686
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8838,7 +8783,7 @@ ViterbiBuilder.prototype.build = function (sentence_str) {
88388783

88398784
module.exports = ViterbiBuilder;
88408785

8841-
},{"../util/SurrogateAwareString":23,"./ViterbiLattice":25,"./ViterbiNode":26}],25:[function(require,module,exports){
8786+
},{"../util/SurrogateAwareString":22,"./ViterbiLattice":24,"./ViterbiNode":25}],24:[function(require,module,exports){
88428787
/*
88438788
* Copyright 2014 Takuya Asano
88448789
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8900,7 +8845,7 @@ ViterbiLattice.prototype.appendEos = function () {
89008845

89018846
module.exports = ViterbiLattice;
89028847

8903-
},{"./ViterbiNode":26}],26:[function(require,module,exports){
8848+
},{"./ViterbiNode":25}],25:[function(require,module,exports){
89048849
/*
89058850
* Copyright 2014 Takuya Asano
89068851
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -8951,7 +8896,7 @@ function ViterbiNode(node_name, node_cost, start_pos, length, type, left_id, rig
89518896

89528897
module.exports = ViterbiNode;
89538898

8954-
},{}],27:[function(require,module,exports){
8899+
},{}],26:[function(require,module,exports){
89558900
/*
89568901
* Copyright 2014 Takuya Asano
89578902
* Copyright 2010-2014 Atilika Inc. and contributors
@@ -9055,5 +9000,5 @@ ViterbiSearcher.prototype.backward = function (lattice) {
90559000

90569001
module.exports = ViterbiSearcher;
90579002

9058-
},{}]},{},[18])(18)
9003+
},{}]},{},[17])(17)
90599004
});

demo/js/tokenize.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ vm.$watch("inputText", function (value) {
7171

7272

7373
// Load and prepare tokenizer
74-
kuromoji.builder({ dicPath: DIC_URL }).build(function (error, _tokenizer) {
74+
kuromoji.loader({ dic_path: DIC_URL }).load(function (error, _tokenizer) {
7575
if (error != null) {
7676
console.log(error);
7777
}
@@ -168,4 +168,4 @@ function drawLattice () {
168168
.rankDir("LR");
169169
renderer.layout(layout).run(g, d3.select("svg g"));
170170
}
171-
*/
171+
*/

example/load-node.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ var kuromoji = require("../src/kuromoji");
2121
var DIC_DIR = "dict/";
2222

2323
// Load dictionaries from file, and prepare tokenizer
24-
kuromoji.builder({ dicPath: DIC_DIR }).build(function (error, tokenizer) {
24+
kuromoji.loader({ dic_path: DIC_DIR }).load(function (error, tokenizer) {
2525
var path = tokenizer.tokenize("すもももももももものうち");
2626
console.log(path);
2727
module.exports = tokenizer;

gulpfile.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ gulp.task("create-dat-files", (done) => {
6666
}
6767

6868
const dic = new IPADic();
69-
const builder = kuromoji.dictionaryBuilder();
69+
const builder = kuromoji.builder();
7070

7171
// Build token info dictionary
7272
const tokenInfoPromise = dic.readTokenInfo((line) => {

0 commit comments

Comments
 (0)