summaryrefslogtreecommitdiff
path: root/node_modules/streamsearch
diff options
context:
space:
mode:
authorsowgro <tpoke.ferrari@gmail.com>2023-09-02 19:12:47 -0400
committersowgro <tpoke.ferrari@gmail.com>2023-09-02 19:12:47 -0400
commite4450c8417624b71d779cb4f41692538f9165e10 (patch)
treeb70826542223ecdf8a7a259f61b0a1abb8a217d8 /node_modules/streamsearch
downloadsowbot3-e4450c8417624b71d779cb4f41692538f9165e10.tar.gz
sowbot3-e4450c8417624b71d779cb4f41692538f9165e10.tar.bz2
sowbot3-e4450c8417624b71d779cb4f41692538f9165e10.zip
first commit
Diffstat (limited to 'node_modules/streamsearch')
-rw-r--r--node_modules/streamsearch/.eslintrc.js5
-rw-r--r--node_modules/streamsearch/.github/workflows/ci.yml24
-rw-r--r--node_modules/streamsearch/.github/workflows/lint.yml23
-rw-r--r--node_modules/streamsearch/LICENSE19
-rw-r--r--node_modules/streamsearch/README.md95
-rw-r--r--node_modules/streamsearch/lib/sbmh.js267
-rw-r--r--node_modules/streamsearch/package.json34
-rw-r--r--node_modules/streamsearch/test/test.js70
8 files changed, 537 insertions, 0 deletions
diff --git a/node_modules/streamsearch/.eslintrc.js b/node_modules/streamsearch/.eslintrc.js
new file mode 100644
index 0000000..be9311d
--- /dev/null
+++ b/node_modules/streamsearch/.eslintrc.js
@@ -0,0 +1,5 @@
+'use strict';
+
+module.exports = {
+ extends: '@mscdex/eslint-config',
+};
diff --git a/node_modules/streamsearch/.github/workflows/ci.yml b/node_modules/streamsearch/.github/workflows/ci.yml
new file mode 100644
index 0000000..29d5178
--- /dev/null
+++ b/node_modules/streamsearch/.github/workflows/ci.yml
@@ -0,0 +1,24 @@
+name: CI
+
+on:
+ pull_request:
+ push:
+ branches: [ master ]
+
+jobs:
+ tests-linux:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ node-version: [10.x, 12.x, 14.x, 16.x]
+ steps:
+ - uses: actions/checkout@v2
+ - name: Use Node.js ${{ matrix.node-version }}
+ uses: actions/setup-node@v1
+ with:
+ node-version: ${{ matrix.node-version }}
+ - name: Install module
+ run: npm install
+ - name: Run tests
+ run: npm test
diff --git a/node_modules/streamsearch/.github/workflows/lint.yml b/node_modules/streamsearch/.github/workflows/lint.yml
new file mode 100644
index 0000000..9f9e1f5
--- /dev/null
+++ b/node_modules/streamsearch/.github/workflows/lint.yml
@@ -0,0 +1,23 @@
+name: lint
+
+on:
+ pull_request:
+ push:
+ branches: [ master ]
+
+env:
+ NODE_VERSION: 16.x
+
+jobs:
+ lint-js:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Use Node.js ${{ env.NODE_VERSION }}
+ uses: actions/setup-node@v1
+ with:
+ node-version: ${{ env.NODE_VERSION }}
+ - name: Install ESLint + ESLint configs/plugins
+ run: npm install --only=dev
+ - name: Lint files
+ run: npm run lint
diff --git a/node_modules/streamsearch/LICENSE b/node_modules/streamsearch/LICENSE
new file mode 100644
index 0000000..9ea90e0
--- /dev/null
+++ b/node_modules/streamsearch/LICENSE
@@ -0,0 +1,19 @@
+Copyright Brian White. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE. \ No newline at end of file
diff --git a/node_modules/streamsearch/README.md b/node_modules/streamsearch/README.md
new file mode 100644
index 0000000..c3934d1
--- /dev/null
+++ b/node_modules/streamsearch/README.md
@@ -0,0 +1,95 @@
+Description
+===========
+
+streamsearch is a module for [node.js](http://nodejs.org/) that allows searching a stream using the Boyer-Moore-Horspool algorithm.
+
+This module is based heavily on the Streaming Boyer-Moore-Horspool C++ implementation by Hongli Lai [here](https://github.com/FooBarWidget/boyer-moore-horspool).
+
+
+Requirements
+============
+
+* [node.js](http://nodejs.org/) -- v10.0.0 or newer
+
+
+Installation
+============
+
+ npm install streamsearch
+
+Example
+=======
+
+```js
+ const { inspect } = require('util');
+
+ const StreamSearch = require('streamsearch');
+
+ const needle = Buffer.from('\r\n');
+ const ss = new StreamSearch(needle, (isMatch, data, start, end) => {
+ if (data)
+ console.log('data: ' + inspect(data.toString('latin1', start, end)));
+ if (isMatch)
+ console.log('match!');
+ });
+
+ const chunks = [
+ 'foo',
+ ' bar',
+ '\r',
+ '\n',
+ 'baz, hello\r',
+ '\n world.',
+ '\r\n Node.JS rules!!\r\n\r\n',
+ ];
+ for (const chunk of chunks)
+ ss.push(Buffer.from(chunk));
+
+ // output:
+ //
+ // data: 'foo'
+ // data: ' bar'
+ // match!
+ // data: 'baz, hello'
+ // match!
+ // data: ' world.'
+ // match!
+ // data: ' Node.JS rules!!'
+ // match!
+ // data: ''
+ // match!
+```
+
+
+API
+===
+
+Properties
+----------
+
+* **maxMatches** - < _integer_ > - The maximum number of matches. Defaults to `Infinity`.
+
+* **matches** - < _integer_ > - The current match count.
+
+
+Functions
+---------
+
+* **(constructor)**(< _mixed_ >needle, < _function_ >callback) - Creates and returns a new instance for searching for a _Buffer_ or _string_ `needle`. `callback` is called any time there is non-matching data and/or there is a needle match. `callback` will be called with the following arguments:
+
+ 1. `isMatch` - _boolean_ - Indicates whether a match has been found
+
+ 2. `data` - _mixed_ - If set, this contains data that did not match the needle.
+
+ 3. `start` - _integer_ - The index in `data` where the non-matching data begins (inclusive).
+
+ 4. `end` - _integer_ - The index in `data` where the non-matching data ends (exclusive).
+
+ 5. `isSafeData` - _boolean_ - Indicates if it is safe to store a reference to `data` (e.g. as-is or via `data.slice()`) or not, as in some cases `data` may point to a Buffer whose contents change over time.
+
+* **destroy**() - _(void)_ - Emits any last remaining unmatched data that may still be buffered and then resets internal state.
+
+* **push**(< _Buffer_ >chunk) - _integer_ - Processes `chunk`, searching for a match. The return value is the last processed index in `chunk` + 1.
+
+* **reset**() - _(void)_ - Resets internal state. Useful for when you wish to start searching a new/different stream for example.
+
diff --git a/node_modules/streamsearch/lib/sbmh.js b/node_modules/streamsearch/lib/sbmh.js
new file mode 100644
index 0000000..510cae2
--- /dev/null
+++ b/node_modules/streamsearch/lib/sbmh.js
@@ -0,0 +1,267 @@
+'use strict';
+/*
+ Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
+ by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
+*/
+function memcmp(buf1, pos1, buf2, pos2, num) {
+ for (let i = 0; i < num; ++i) {
+ if (buf1[pos1 + i] !== buf2[pos2 + i])
+ return false;
+ }
+ return true;
+}
+
+class SBMH {
+ constructor(needle, cb) {
+ if (typeof cb !== 'function')
+ throw new Error('Missing match callback');
+
+ if (typeof needle === 'string')
+ needle = Buffer.from(needle);
+ else if (!Buffer.isBuffer(needle))
+ throw new Error(`Expected Buffer for needle, got ${typeof needle}`);
+
+ const needleLen = needle.length;
+
+ this.maxMatches = Infinity;
+ this.matches = 0;
+
+ this._cb = cb;
+ this._lookbehindSize = 0;
+ this._needle = needle;
+ this._bufPos = 0;
+
+ this._lookbehind = Buffer.allocUnsafe(needleLen);
+
+ // Initialize occurrence table.
+ this._occ = [
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen, needleLen, needleLen,
+ needleLen, needleLen, needleLen, needleLen
+ ];
+
+ // Populate occurrence table with analysis of the needle, ignoring the last
+ // letter.
+ if (needleLen > 1) {
+ for (let i = 0; i < needleLen - 1; ++i)
+ this._occ[needle[i]] = needleLen - 1 - i;
+ }
+ }
+
+ reset() {
+ this.matches = 0;
+ this._lookbehindSize = 0;
+ this._bufPos = 0;
+ }
+
+ push(chunk, pos) {
+ let result;
+ if (!Buffer.isBuffer(chunk))
+ chunk = Buffer.from(chunk, 'latin1');
+ const chunkLen = chunk.length;
+ this._bufPos = pos || 0;
+ while (result !== chunkLen && this.matches < this.maxMatches)
+ result = feed(this, chunk);
+ return result;
+ }
+
+ destroy() {
+ const lbSize = this._lookbehindSize;
+ if (lbSize)
+ this._cb(false, this._lookbehind, 0, lbSize, false);
+ this.reset();
+ }
+}
+
+function feed(self, data) {
+ const len = data.length;
+ const needle = self._needle;
+ const needleLen = needle.length;
+
+ // Positive: points to a position in `data`
+ // pos == 3 points to data[3]
+ // Negative: points to a position in the lookbehind buffer
+ // pos == -2 points to lookbehind[lookbehindSize - 2]
+ let pos = -self._lookbehindSize;
+ const lastNeedleCharPos = needleLen - 1;
+ const lastNeedleChar = needle[lastNeedleCharPos];
+ const end = len - needleLen;
+ const occ = self._occ;
+ const lookbehind = self._lookbehind;
+
+ if (pos < 0) {
+ // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
+ // search with character lookup code that considers both the
+ // lookbehind buffer and the current round's haystack data.
+ //
+ // Loop until
+ // there is a match.
+ // or until
+ // we've moved past the position that requires the
+ // lookbehind buffer. In this case we switch to the
+ // optimized loop.
+ // or until
+ // the character to look at lies outside the haystack.
+ while (pos < 0 && pos <= end) {
+ const nextPos = pos + lastNeedleCharPos;
+ const ch = (nextPos < 0
+ ? lookbehind[self._lookbehindSize + nextPos]
+ : data[nextPos]);
+
+ if (ch === lastNeedleChar
+ && matchNeedle(self, data, pos, lastNeedleCharPos)) {
+ self._lookbehindSize = 0;
+ ++self.matches;
+ if (pos > -self._lookbehindSize)
+ self._cb(true, lookbehind, 0, self._lookbehindSize + pos, false);
+ else
+ self._cb(true, undefined, 0, 0, true);
+
+ return (self._bufPos = pos + needleLen);
+ }
+
+ pos += occ[ch];
+ }
+
+ // No match.
+
+ // There's too few data for Boyer-Moore-Horspool to run,
+ // so let's use a different algorithm to skip as much as
+ // we can.
+ // Forward pos until
+ // the trailing part of lookbehind + data
+ // looks like the beginning of the needle
+ // or until
+ // pos == 0
+ while (pos < 0 && !matchNeedle(self, data, pos, len - pos))
+ ++pos;
+
+ if (pos < 0) {
+ // Cut off part of the lookbehind buffer that has
+ // been processed and append the entire haystack
+ // into it.
+ const bytesToCutOff = self._lookbehindSize + pos;
+
+ if (bytesToCutOff > 0) {
+ // The cut off data is guaranteed not to contain the needle.
+ self._cb(false, lookbehind, 0, bytesToCutOff, false);
+ }
+
+ self._lookbehindSize -= bytesToCutOff;
+ lookbehind.copy(lookbehind, 0, bytesToCutOff, self._lookbehindSize);
+ lookbehind.set(data, self._lookbehindSize);
+ self._lookbehindSize += len;
+
+ self._bufPos = len;
+ return len;
+ }
+
+ // Discard lookbehind buffer.
+ self._cb(false, lookbehind, 0, self._lookbehindSize, false);
+ self._lookbehindSize = 0;
+ }
+
+ pos += self._bufPos;
+
+ const firstNeedleChar = needle[0];
+
+ // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
+ // search with optimized character lookup code that only considers
+ // the current round's haystack data.
+ while (pos <= end) {
+ const ch = data[pos + lastNeedleCharPos];
+
+ if (ch === lastNeedleChar
+ && data[pos] === firstNeedleChar
+ && memcmp(needle, 0, data, pos, lastNeedleCharPos)) {
+ ++self.matches;
+ if (pos > 0)
+ self._cb(true, data, self._bufPos, pos, true);
+ else
+ self._cb(true, undefined, 0, 0, true);
+
+ return (self._bufPos = pos + needleLen);
+ }
+
+ pos += occ[ch];
+ }
+
+ // There was no match. If there's trailing haystack data that we cannot
+ // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
+ // data is less than the needle size) then match using a modified
+ // algorithm that starts matching from the beginning instead of the end.
+ // Whatever trailing data is left after running this algorithm is added to
+ // the lookbehind buffer.
+ while (pos < len) {
+ if (data[pos] !== firstNeedleChar
+ || !memcmp(data, pos, needle, 0, len - pos)) {
+ ++pos;
+ continue;
+ }
+ data.copy(lookbehind, 0, pos, len);
+ self._lookbehindSize = len - pos;
+ break;
+ }
+
+ // Everything until `pos` is guaranteed not to contain needle data.
+ if (pos > 0)
+ self._cb(false, data, self._bufPos, pos < len ? pos : len, true);
+
+ self._bufPos = len;
+ return len;
+}
+
+function matchNeedle(self, data, pos, len) {
+ const lb = self._lookbehind;
+ const lbSize = self._lookbehindSize;
+ const needle = self._needle;
+
+ for (let i = 0; i < len; ++i, ++pos) {
+ const ch = (pos < 0 ? lb[lbSize + pos] : data[pos]);
+ if (ch !== needle[i])
+ return false;
+ }
+ return true;
+}
+
+module.exports = SBMH;
diff --git a/node_modules/streamsearch/package.json b/node_modules/streamsearch/package.json
new file mode 100644
index 0000000..51df8f9
--- /dev/null
+++ b/node_modules/streamsearch/package.json
@@ -0,0 +1,34 @@
+{
+ "name": "streamsearch",
+ "version": "1.1.0",
+ "author": "Brian White <mscdex@mscdex.net>",
+ "description": "Streaming Boyer-Moore-Horspool searching for node.js",
+ "main": "./lib/sbmh.js",
+ "engines": {
+ "node": ">=10.0.0"
+ },
+ "devDependencies": {
+ "@mscdex/eslint-config": "^1.1.0",
+ "eslint": "^7.32.0"
+ },
+ "scripts": {
+ "test": "node test/test.js",
+ "lint": "eslint --cache --report-unused-disable-directives --ext=.js .eslintrc.js lib test",
+ "lint:fix": "npm run lint -- --fix"
+ },
+ "keywords": [
+ "stream",
+ "horspool",
+ "boyer-moore-horspool",
+ "boyer-moore",
+ "search"
+ ],
+ "licenses": [{
+ "type": "MIT",
+ "url": "http://github.com/mscdex/streamsearch/raw/master/LICENSE"
+ }],
+ "repository": {
+ "type": "git",
+ "url": "http://github.com/mscdex/streamsearch.git"
+ }
+}
diff --git a/node_modules/streamsearch/test/test.js b/node_modules/streamsearch/test/test.js
new file mode 100644
index 0000000..39a04d7
--- /dev/null
+++ b/node_modules/streamsearch/test/test.js
@@ -0,0 +1,70 @@
+'use strict';
+
+const assert = require('assert');
+
+const StreamSearch = require('../lib/sbmh.js');
+
+[
+ {
+ needle: '\r\n',
+ chunks: [
+ 'foo',
+ ' bar',
+ '\r',
+ '\n',
+ 'baz, hello\r',
+ '\n world.',
+ '\r\n Node.JS rules!!\r\n\r\n',
+ ],
+ expect: [
+ [false, 'foo'],
+ [false, ' bar'],
+ [ true, null],
+ [false, 'baz, hello'],
+ [ true, null],
+ [false, ' world.'],
+ [ true, null],
+ [ true, ' Node.JS rules!!'],
+ [ true, ''],
+ ],
+ },
+ {
+ needle: '---foobarbaz',
+ chunks: [
+ '---foobarbaz',
+ 'asdf',
+ '\r\n',
+ '---foobarba',
+ '---foobar',
+ 'ba',
+ '\r\n---foobarbaz--\r\n',
+ ],
+ expect: [
+ [ true, null],
+ [false, 'asdf'],
+ [false, '\r\n'],
+ [false, '---foobarba'],
+ [false, '---foobarba'],
+ [ true, '\r\n'],
+ [false, '--\r\n'],
+ ],
+ },
+].forEach((test, i) => {
+ console.log(`Running test #${i + 1}`);
+ const { needle, chunks, expect } = test;
+
+ const results = [];
+ const ss = new StreamSearch(Buffer.from(needle),
+ (isMatch, data, start, end) => {
+ if (data)
+ data = data.toString('latin1', start, end);
+ else
+ data = null;
+ results.push([isMatch, data]);
+ });
+
+ for (const chunk of chunks)
+ ss.push(Buffer.from(chunk));
+
+ assert.deepStrictEqual(results, expect);
+});