new Jaccard(optionsopt) → {Jaccard}
Promise-based Jaccard similarity coefficient index calculation framework
Parameters:
Name | Type | Attributes | Description |
---|---|---|---|
options |
Object |
<optional> |
- Source:
Returns:
- Type
- Jaccard
Example
#!/usr/bin/env node
var Jaccard = require("jaccard-index");
var logs = {
"item1": ["user1", "user2"],
"item2": ["user2", "user3", "user4"],
"item3": ["user1", "user2", "user5"]
};
var items = Object.keys(logs); // item1, item2, item3
var options = {
getLog: getLog
};
Jaccard(options).getLinks(items).then(showResult).catch(console.warn);
function getLog(itemId) {
return Promise.resolve(logs[itemId]); // async
// return logs[itemId]; // sync
}
function showResult(links) {
console.log(JSON.stringify(links, null, 2));
process.exit(0);
}
// Result:
// [
// {"source": "item1", "target": "item2", "value": 0.25},
// {"source": "item1", "target": "item3", "value": 0.6666666666666666},
// {"source": "item2", "target": "item3", "value": 0.2}
// ]
Members
direction :boolean
Set false when source and target are swappable.
Set true when they have a direction.
Type:
- boolean
- Default Value:
- false
- Source:
expire :number|null
Time in millisecond to expire cached results.
Set null to disable the cache feature.
Type:
- number | null
- Default Value:
- null
- Source:
throttle :number|null
Concurrency to run data loading and index calculating.
Only single task each allowed per default.
The other tasks would wait to start until the first task completed.
Set null to disable the throttle.
Type:
- number | null
- Default Value:
- 1
- Source:
timeout :number|null
Timeout in millisecond until receiving a result.
Set null to disable the timeout.
Type:
- number | null
- Default Value:
- null
- Source:
wait :number|null
Time in millisecond to wait between each calculating iteration to avoid Node process locked.
Set null to disable any additional wait.
Type:
- number | null
- Default Value:
- 0
- Source:
Methods
cachedIndex(sourceItem, targetItem) → {Promise.<(number|undefined)>}
returns a Promise for Jaccard index between the pair of items with the built-in cache mechanism.
This calls getIndex() method when the cache not available.
Parameters:
Name | Type | Description |
---|---|---|
sourceItem |
string | |
targetItem |
string |
- Source:
Returns:
- Type
- Promise.<(number|undefined)>
cachedLog(itemId) → {Promise.<Array>}
retrieves a log array with the built-in cache mechanism.
This calls getLog() method when the cache not available.
Parameters:
Name | Type | Description |
---|---|---|
itemId |
string |
- Source:
Returns:
- Type
- Promise.<Array>
filter(index) → {number|null|any}
returns a Jaccard index value to be placed at the result.
This does nothing per default.
Override this function to apply a precision or another format.
Return null to ignore the index.
Parameters:
Name | Type | Description |
---|---|---|
index |
number | Jaccard index |
- Source:
Returns:
- Type
- number | null | any
Examples
jaccard.filter = function(index) {
return Math.filter(index * 1000) / 1000;
};
jaccard.filter = function(index) {
return (index > 0.001) ? index : null;
};
getIndex(sourceItem, targetItem) → {Promise.<(number|undefined)>}
returns a Promise for Jaccard index between the pair of items.
Parameters:
Name | Type | Description |
---|---|---|
sourceItem |
string | |
targetItem |
string |
- Source:
Returns:
- Type
- Promise.<(number|undefined)>
getItems() → {Array.<string>}
returns array of all items.
Override this only when needed.
- Source:
Returns:
- Type
- Array.<string>
getLinks(sourceItemsopt, targetItemsopt, onLinkopt) → {Promise.<Array>}
returns an Array of Jaccard index of each links.
Parameters:
Name | Type | Attributes | Description |
---|---|---|---|
sourceItems |
Array.<string> | Promise.<Array> |
<optional> |
array of source items |
targetItems |
Array.<string> | Promise.<Array> |
<optional> |
array of target items |
onLink |
function |
<optional> |
function(index, sourceItem, targetItem) {...} |
- Source:
Returns:
- Type
- Promise.<Array>
getLog(itemId) → {Array.<string>|Promise.<Array>}
retrieves a log array.
Overriding this method is required before calling getLinks() or getIndex() methods.
Parameters:
Name | Type | Description |
---|---|---|
itemId |
string |
- Source:
Returns:
- Type
- Array.<string> | Promise.<Array>
Example
var fs = require("fs");
var Jaccard = require("jaccard-index");
var jaccard = Jaccard();
jaccard.getLog = getLog;
function getLog(itemId) {
return new Promise(function(resolve, reject) {
var file = "test/example/" + itemId + ".txt";
fs.readFile(file, "utf-8", function(err, text) {
if (err) return reject(err);
var data = text.split("\n").filter(function(v) {
return !!v;
});
return resolve(data);
});
});
}
index(sourceLog, targetLog) → {number|undefined|Promise.<(number|undefined)>}
calculates a Jaccard index between a pair of Arrays.
Override this when you need any other index method than Jaccard index.
Parameters:
Name | Type | Description |
---|---|---|
sourceLog |
Array.<string> | |
targetLog |
Array.<string> |
- Source:
Returns:
- Type
- number | undefined | Promise.<(number|undefined)>
Example
var Jaccard = require("jaccard-index");
var jaccard = Jaccard();
var item1 = ["user1", "user2"];
var item2 = ["user2", "user3", "user4"];
var index = jaccard.index(item1, item2);
console.log(index); // => 0.25