Remove duplicate values from JS array Ask Question

Remove duplicate values from JS array Ask Question

I have a very simple JavaScript array that may or may not contain duplicates.

var names = ["Mike","Matt","Nancy","Adam","Jenny","Nancy","Carl"];

I need to remove the duplicates and put the unique values in a new array.

I could point to all the code that I've tried but I think it's useless because they don't work. I accept jQuery solutions too.

Similar question:

ベストアンサー1

TL;DR

Using the Set constructor and the spread syntax:

uniq = [...new Set(array)];

( Note that var uniq will be an array... new Set() turns it into a set, but [... ] turns it back into an array again )


"Smart" but naïve way

uniqueArray = a.filter(function(item, pos) {
    return a.indexOf(item) == pos;
})

Basically, we iterate over the array and, for each element, check if the first position of this element in the array is equal to the current position. Obviously, these two positions are different for duplicate elements.

Using the 3rd ("this array") parameter of the filter callback we can avoid a closure of the array variable:

uniqueArray = a.filter(function(item, pos, self) {
    return self.indexOf(item) == pos;
})

Although concise, this algorithm is not particularly efficient for large arrays (quadratic time).

Hashtables to the rescue

function uniq(a) {
    var seen = {};
    return a.filter(function(item) {
        return seen.hasOwnProperty(item) ? false : (seen[item] = true);
    });
}

This is how it's usually done. The idea is to place each element in a hashtable and then check for its presence instantly. This gives us linear time, but has at least two drawbacks:

  • since hash keys can only be strings or symbols in JavaScript, this code doesn't distinguish numbers and "numeric strings". That is, uniq([1,"1"]) will return just [1]
  • for the same reason, all objects will be considered equal: uniq([{foo:1},{foo:2}]) will return just [{foo:1}].

That said, if your arrays contain only primitives and you don't care about types (e.g. it's always numbers), this solution is optimal.

The best from two worlds

A universal solution combines both approaches: it uses hash lookups for primitives and linear search for objects.

function uniq(a) {
    var prims = {"boolean":{}, "number":{}, "string":{}}, objs = [];

    return a.filter(function(item) {
        var type = typeof item;
        if(type in prims)
            return prims[type].hasOwnProperty(item) ? false : (prims[type][item] = true);
        else
            return objs.indexOf(item) >= 0 ? false : objs.push(item);
    });
}

sort | uniq

Another option is to sort the array first, and then remove each element equal to the preceding one:

function uniq(a) {
    return a.sort().filter(function(item, pos, ary) {
        return !pos || item != ary[pos - 1];
    });
}

Again, this doesn't work with objects (because all objects are equal for sort). Additionally, we silently change the original array as a side effect - not good! However, if your input is already sorted, this is the way to go (just remove sort from the above).

Unique by...

Sometimes it's desired to uniquify a list based on some criteria other than just equality, for example, to filter out objects that are different, but share some property. This can be done elegantly by passing a callback. This "key" callback is applied to each element, and elements with equal "keys" are removed. Since key is expected to return a primitive, hash table will work fine here:

function uniqBy(a, key) {
    var seen = {};
    return a.filter(function(item) {
        var k = key(item);
        return seen.hasOwnProperty(k) ? false : (seen[k] = true);
    })
}

A particularly useful key() is JSON.stringify which will remove objects that are physically different, but "look" the same:

a = [[1,2,3], [4,5,6], [1,2,3]]
b = uniqBy(a, JSON.stringify)
console.log(b) // [[1,2,3], [4,5,6]]

If the key is not primitive, you have to resort to the linear search:

function uniqBy(a, key) {
    var index = [];
    return a.filter(function (item) {
        var k = key(item);
        return index.indexOf(k) >= 0 ? false : index.push(k);
    });
}

In ES6 you can use a Set:

function uniqBy(a, key) {
    let seen = new Set();
    return a.filter(item => {
        let k = key(item);
        return seen.has(k) ? false : seen.add(k);
    });
}

またはMap:

function uniqBy(a, key) {
    return [
        ...new Map(
            a.map(x => [key(x), x])
        ).values()
    ]
}

どちらも非プリミティブキーでも機能します。

最初ですか、最後ですか?

キーによってオブジェクトを削除する場合、「等しい」オブジェクトの最初のものまたは最後のものを保持したい場合があります。

最初のものを保持するには上記の変形を使用しSetMap最後のものを保持するには を使用します。

function uniqByKeepFirst(a, key) {
    let seen = new Set();
    return a.filter(item => {
        let k = key(item);
        return seen.has(k) ? false : seen.add(k);
    });
}


function uniqByKeepLast(a, key) {
    return [
        ...new Map(
            a.map(x => [key(x), x])
        ).values()
    ]
}

//

data = [
    {a:1, u:1},
    {a:2, u:2},
    {a:3, u:3},
    {a:4, u:1},
    {a:5, u:2},
    {a:6, u:3},
];

console.log(uniqByKeepFirst(data, it => it.u))
console.log(uniqByKeepLast(data, it => it.u))

図書館

両方アンダースコアそしてローダッシュメソッドを提供しますuniq。そのアルゴリズムは基本的に上記の最初のスニペットに似ており、要約すると次のようになります。

var result = [];
a.forEach(function(item) {
     if(result.indexOf(item) < 0) {
         result.push(item);
     }
});

indexOfこれは二次関数ですが、ネイティブのラップ、キーによる一意化機能(iteratee彼らの用語で)、すでにソートされた配列の最適化など、便利な追加機能があります。

jQuery を使用していて、お金をかけずに何もできないという場合は、次のようになります。

  $.uniqArray = function(a) {
        return $.grep(a, function(item, pos) {
            return $.inArray(item, a) === pos;
        });
  }

これも、最初のスニペットのバリエーションです。

パフォーマンス

JavaScript では関数呼び出しにコストがかかるため、上記のソリューションは簡潔ではあるものの、特に効率的ではありません。パフォーマンスを最大限に高めるには、filterループに置き換えて他の関数呼び出しを削除します。

function uniq_fast(a) {
    var seen = {};
    var out = [];
    var len = a.length;
    var j = 0;
    for(var i = 0; i < len; i++) {
         var item = a[i];
         if(seen[item] !== 1) {
               seen[item] = 1;
               out[j++] = item;
         }
    }
    return out;
}

この醜いコードの塊は、上記のスニペット #3 と同じことを行います が、桁違いに高速です (2017 年現在、速度は 2 倍だけです - JS コアの人々は素晴らしい仕事をしています!)。

function uniq(a) {
    var seen = {};
    return a.filter(function(item) {
        return seen.hasOwnProperty(item) ? false : (seen[item] = true);
    });
}

function uniq_fast(a) {
    var seen = {};
    var out = [];
    var len = a.length;
    var j = 0;
    for(var i = 0; i < len; i++) {
         var item = a[i];
         if(seen[item] !== 1) {
               seen[item] = 1;
               out[j++] = item;
         }
    }
    return out;
}

/////

var r = [0,1,2,3,4,5,6,7,8,9],
    a = [],
    LEN = 1000,
    LOOPS = 1000;

while(LEN--)
    a = a.concat(r);

var d = new Date();
for(var i = 0; i < LOOPS; i++)
    uniq(a);
document.write('<br>uniq, ms/loop: ' + (new Date() - d)/LOOPS)

var d = new Date();
for(var i = 0; i < LOOPS; i++)
    uniq_fast(a);
document.write('<br>uniq_fast, ms/loop: ' + (new Date() - d)/LOOPS)

ES6

ES6は、セットオブジェクトを使用すると、作業がずっと簡単になります。

function uniq(a) {
   return Array.from(new Set(a));
}

または

let uniq = a => [...new Set(a)];

Python とは異なり、ES6 セットは挿入順に反復されるため、このコードは元の配列の順序を保持することに注意してください。

ただし、一意の要素を持つ配列が必要な場合は、最初からセットを使用することをお勧めします。

発電機

の「遅延」ジェネレータ ベースのバージョンは、uniq同じ基礎に基づいて構築できます。

  • 引数から次の値を取得する
  • すでに見たことがある場合はスキップしてください
  • それ以外の場合は、それを譲り、すでに見た値のセットに追加します。

function* uniqIter(a) {
    let seen = new Set();

    for (let x of a) {
        if (!seen.has(x)) {
            seen.add(x);
            yield x;
        }
    }
}

// example:

function* randomsBelow(limit) {
    while (1)
        yield Math.floor(Math.random() * limit);
}

// note that randomsBelow is endless

count = 20;
limit = 30;

for (let r of uniqIter(randomsBelow(limit))) {
    console.log(r);
    if (--count === 0)
        break
}

// exercise for the reader: what happens if we set `limit` less than `count` and why

おすすめ記事