Class: Jaccard

Jaccard

new Jaccard(optionsopt) → {Jaccard}

Promise-based Jaccard similarity coefficient index calculation framework
Parameters:
Name Type Attributes Description
options Object <optional>
Source:
Returns:
Type
Jaccard
Example
#!/usr/bin/env node

var Jaccard = require("jaccard-index");

var logs = {
  "item1": ["user1", "user2"],
  "item2": ["user2", "user3", "user4"],
  "item3": ["user1", "user2", "user5"]
};

var items = Object.keys(logs); // item1, item2, item3

var options = {
  getLog: getLog
};

Jaccard(options).getLinks(items).then(showResult).catch(console.warn);

function getLog(itemId) {
  return Promise.resolve(logs[itemId]); // async
  // return logs[itemId]; // sync
}

function showResult(links) {
  console.log(JSON.stringify(links, null, 2));
  process.exit(0);
}

// Result:
// [
//   {"source": "item1", "target": "item2", "value": 0.25},
//   {"source": "item1", "target": "item3", "value": 0.6666666666666666},
//   {"source": "item2", "target": "item3", "value": 0.2}
// ]

Members

direction :boolean

Set false when source and target are swappable. Set true when they have a direction.
Type:
  • boolean
Default Value:
  • false
Source:

expire :number|null

Time in millisecond to expire cached results. Set null to disable the cache feature.
Type:
  • number | null
Default Value:
  • null
Source:

throttle :number|null

Concurrency to run data loading and index calculating. Only single task each allowed per default. The other tasks would wait to start until the first task completed. Set null to disable the throttle.
Type:
  • number | null
Default Value:
  • 1
Source:

timeout :number|null

Timeout in millisecond until receiving a result. Set null to disable the timeout.
Type:
  • number | null
Default Value:
  • null
Source:

wait :number|null

Time in millisecond to wait between each calculating iteration to avoid Node process locked. Set null to disable any additional wait.
Type:
  • number | null
Default Value:
  • 0
Source:

Methods

cachedIndex(sourceItem, targetItem) → {Promise.<(number|undefined)>}

returns a Promise for Jaccard index between the pair of items with the built-in cache mechanism. This calls getIndex() method when the cache not available.
Parameters:
Name Type Description
sourceItem string
targetItem string
Source:
Returns:
Type
Promise.<(number|undefined)>

cachedLog(itemId) → {Promise.<Array>}

retrieves a log array with the built-in cache mechanism. This calls getLog() method when the cache not available.
Parameters:
Name Type Description
itemId string
Source:
Returns:
Type
Promise.<Array>

filter(index) → {number|null|any}

returns a Jaccard index value to be placed at the result. This does nothing per default. Override this function to apply a precision or another format. Return null to ignore the index.
Parameters:
Name Type Description
index number Jaccard index
Source:
Returns:
Type
number | null | any
Examples
jaccard.filter = function(index) {
  return Math.filter(index * 1000) / 1000;
};
jaccard.filter = function(index) {
  return (index > 0.001) ? index : null;
};

getIndex(sourceItem, targetItem) → {Promise.<(number|undefined)>}

returns a Promise for Jaccard index between the pair of items.
Parameters:
Name Type Description
sourceItem string
targetItem string
Source:
Returns:
Type
Promise.<(number|undefined)>

getItems() → {Array.<string>}

returns array of all items. Override this only when needed.
Source:
Returns:
Type
Array.<string>
returns an Array of Jaccard index of each links.
Parameters:
Name Type Attributes Description
sourceItems Array.<string> | Promise.<Array> <optional>
array of source items
targetItems Array.<string> | Promise.<Array> <optional>
array of target items
onLink function <optional>
function(index, sourceItem, targetItem) {...}
Source:
Returns:
Type
Promise.<Array>

getLog(itemId) → {Array.<string>|Promise.<Array>}

retrieves a log array. Overriding this method is required before calling getLinks() or getIndex() methods.
Parameters:
Name Type Description
itemId string
Source:
Returns:
Type
Array.<string> | Promise.<Array>
Example
var fs = require("fs");
var Jaccard = require("jaccard-index");

var jaccard = Jaccard();

jaccard.getLog = getLog;

function getLog(itemId) {
  return new Promise(function(resolve, reject) {
    var file = "test/example/" + itemId + ".txt";
    fs.readFile(file, "utf-8", function(err, text) {
      if (err) return reject(err);
      var data = text.split("\n").filter(function(v) {
        return !!v;
      });
      return resolve(data);
    });
  });
}

index(sourceLog, targetLog) → {number|undefined|Promise.<(number|undefined)>}

calculates a Jaccard index between a pair of Arrays. Override this when you need any other index method than Jaccard index.
Parameters:
Name Type Description
sourceLog Array.<string>
targetLog Array.<string>
Source:
Returns:
Type
number | undefined | Promise.<(number|undefined)>
Example
var Jaccard = require("jaccard-index");
var jaccard = Jaccard();

var item1 = ["user1", "user2"];
var item2 = ["user2", "user3", "user4"];
var index = jaccard.index(item1, item2);

console.log(index); // => 0.25