Use MongoDB with unique index by hash of head 2mb of file.
'use strict' const fs = require('fs') const path = require('path') const util = require('util') const readChunk = require('read-chunk') const crypto = require('crypto') const readdir = util.promisify(fs.readdir) const stat = util.promisify(fs.stat) const mongo = require('mongodb') const url = 'mongodb://localhost:27017' const client = new mongo.MongoClient(url, { useNewUrlParser: true }) async function walk(dir, collection) { var list = await readdir(dir) for (item of list) { var file = dir + '/' + item var stats = await stat(file) console.log('file: ', file) try { if (stats.isFile()) { const buffer = readChunk.sync(file, 0, 2 * 1024 * 1024) const hex1 = crypto.createHmac('md5', buffer).digest('hex') const hex2 = crypto.createHmac('md5', `${stats.size}`).digest('hex') const hex = hex1 + hex2 try { var res = await collection.insertOne({ name: file, hex: hex }) } catch (err) { const dubl = await collection.find({ hex: hex }).toArray() console.log('\ndublicate: ', file) console.log(' : ', dubl[0].name) } } if (stats.isDirectory()) { await walk(file, collection) } } catch (err) { console.log(err)} } } async function main() { const connect = await client.connect() const db = await client.db('files') const collection = db.collection('files') const res = await collection.deleteMany({}) await collection.createIndex({ hex: 1 }, { unique: true }) await walk('/media/photo', collection) await client.close(); } main()