diff options
author | sowgro <tpoke.ferrari@gmail.com> | 2023-09-02 19:12:47 -0400 |
---|---|---|
committer | sowgro <tpoke.ferrari@gmail.com> | 2023-09-02 19:12:47 -0400 |
commit | e4450c8417624b71d779cb4f41692538f9165e10 (patch) | |
tree | b70826542223ecdf8a7a259f61b0a1abb8a217d8 /node_modules/streamsearch | |
download | sowbot3-e4450c8417624b71d779cb4f41692538f9165e10.tar.gz sowbot3-e4450c8417624b71d779cb4f41692538f9165e10.tar.bz2 sowbot3-e4450c8417624b71d779cb4f41692538f9165e10.zip |
first commit
Diffstat (limited to 'node_modules/streamsearch')
-rw-r--r-- | node_modules/streamsearch/.eslintrc.js | 5 | ||||
-rw-r--r-- | node_modules/streamsearch/.github/workflows/ci.yml | 24 | ||||
-rw-r--r-- | node_modules/streamsearch/.github/workflows/lint.yml | 23 | ||||
-rw-r--r-- | node_modules/streamsearch/LICENSE | 19 | ||||
-rw-r--r-- | node_modules/streamsearch/README.md | 95 | ||||
-rw-r--r-- | node_modules/streamsearch/lib/sbmh.js | 267 | ||||
-rw-r--r-- | node_modules/streamsearch/package.json | 34 | ||||
-rw-r--r-- | node_modules/streamsearch/test/test.js | 70 |
8 files changed, 537 insertions, 0 deletions
diff --git a/node_modules/streamsearch/.eslintrc.js b/node_modules/streamsearch/.eslintrc.js new file mode 100644 index 0000000..be9311d --- /dev/null +++ b/node_modules/streamsearch/.eslintrc.js @@ -0,0 +1,5 @@ +'use strict'; + +module.exports = { + extends: '@mscdex/eslint-config', +}; diff --git a/node_modules/streamsearch/.github/workflows/ci.yml b/node_modules/streamsearch/.github/workflows/ci.yml new file mode 100644 index 0000000..29d5178 --- /dev/null +++ b/node_modules/streamsearch/.github/workflows/ci.yml @@ -0,0 +1,24 @@ +name: CI + +on: + pull_request: + push: + branches: [ master ] + +jobs: + tests-linux: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + node-version: [10.x, 12.x, 14.x, 16.x] + steps: + - uses: actions/checkout@v2 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v1 + with: + node-version: ${{ matrix.node-version }} + - name: Install module + run: npm install + - name: Run tests + run: npm test diff --git a/node_modules/streamsearch/.github/workflows/lint.yml b/node_modules/streamsearch/.github/workflows/lint.yml new file mode 100644 index 0000000..9f9e1f5 --- /dev/null +++ b/node_modules/streamsearch/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: lint + +on: + pull_request: + push: + branches: [ master ] + +env: + NODE_VERSION: 16.x + +jobs: + lint-js: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Use Node.js ${{ env.NODE_VERSION }} + uses: actions/setup-node@v1 + with: + node-version: ${{ env.NODE_VERSION }} + - name: Install ESLint + ESLint configs/plugins + run: npm install --only=dev + - name: Lint files + run: npm run lint diff --git a/node_modules/streamsearch/LICENSE b/node_modules/streamsearch/LICENSE new file mode 100644 index 0000000..9ea90e0 --- /dev/null +++ b/node_modules/streamsearch/LICENSE @@ -0,0 +1,19 @@ +Copyright Brian White. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
\ No newline at end of file diff --git a/node_modules/streamsearch/README.md b/node_modules/streamsearch/README.md new file mode 100644 index 0000000..c3934d1 --- /dev/null +++ b/node_modules/streamsearch/README.md @@ -0,0 +1,95 @@ +Description +=========== + +streamsearch is a module for [node.js](http://nodejs.org/) that allows searching a stream using the Boyer-Moore-Horspool algorithm. + +This module is based heavily on the Streaming Boyer-Moore-Horspool C++ implementation by Hongli Lai [here](https://github.com/FooBarWidget/boyer-moore-horspool). + + +Requirements +============ + +* [node.js](http://nodejs.org/) -- v10.0.0 or newer + + +Installation +============ + + npm install streamsearch + +Example +======= + +```js + const { inspect } = require('util'); + + const StreamSearch = require('streamsearch'); + + const needle = Buffer.from('\r\n'); + const ss = new StreamSearch(needle, (isMatch, data, start, end) => { + if (data) + console.log('data: ' + inspect(data.toString('latin1', start, end))); + if (isMatch) + console.log('match!'); + }); + + const chunks = [ + 'foo', + ' bar', + '\r', + '\n', + 'baz, hello\r', + '\n world.', + '\r\n Node.JS rules!!\r\n\r\n', + ]; + for (const chunk of chunks) + ss.push(Buffer.from(chunk)); + + // output: + // + // data: 'foo' + // data: ' bar' + // match! + // data: 'baz, hello' + // match! + // data: ' world.' + // match! + // data: ' Node.JS rules!!' + // match! + // data: '' + // match! +``` + + +API +=== + +Properties +---------- + +* **maxMatches** - < _integer_ > - The maximum number of matches. Defaults to `Infinity`. + +* **matches** - < _integer_ > - The current match count. + + +Functions +--------- + +* **(constructor)**(< _mixed_ >needle, < _function_ >callback) - Creates and returns a new instance for searching for a _Buffer_ or _string_ `needle`. `callback` is called any time there is non-matching data and/or there is a needle match. `callback` will be called with the following arguments: + + 1. `isMatch` - _boolean_ - Indicates whether a match has been found + + 2. `data` - _mixed_ - If set, this contains data that did not match the needle. + + 3. `start` - _integer_ - The index in `data` where the non-matching data begins (inclusive). + + 4. `end` - _integer_ - The index in `data` where the non-matching data ends (exclusive). + + 5. `isSafeData` - _boolean_ - Indicates if it is safe to store a reference to `data` (e.g. as-is or via `data.slice()`) or not, as in some cases `data` may point to a Buffer whose contents change over time. + +* **destroy**() - _(void)_ - Emits any last remaining unmatched data that may still be buffered and then resets internal state. + +* **push**(< _Buffer_ >chunk) - _integer_ - Processes `chunk`, searching for a match. The return value is the last processed index in `chunk` + 1. + +* **reset**() - _(void)_ - Resets internal state. Useful for when you wish to start searching a new/different stream for example. + diff --git a/node_modules/streamsearch/lib/sbmh.js b/node_modules/streamsearch/lib/sbmh.js new file mode 100644 index 0000000..510cae2 --- /dev/null +++ b/node_modules/streamsearch/lib/sbmh.js @@ -0,0 +1,267 @@ +'use strict'; +/* + Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation + by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool +*/ +function memcmp(buf1, pos1, buf2, pos2, num) { + for (let i = 0; i < num; ++i) { + if (buf1[pos1 + i] !== buf2[pos2 + i]) + return false; + } + return true; +} + +class SBMH { + constructor(needle, cb) { + if (typeof cb !== 'function') + throw new Error('Missing match callback'); + + if (typeof needle === 'string') + needle = Buffer.from(needle); + else if (!Buffer.isBuffer(needle)) + throw new Error(`Expected Buffer for needle, got ${typeof needle}`); + + const needleLen = needle.length; + + this.maxMatches = Infinity; + this.matches = 0; + + this._cb = cb; + this._lookbehindSize = 0; + this._needle = needle; + this._bufPos = 0; + + this._lookbehind = Buffer.allocUnsafe(needleLen); + + // Initialize occurrence table. + this._occ = [ + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen, needleLen, needleLen, + needleLen, needleLen, needleLen, needleLen + ]; + + // Populate occurrence table with analysis of the needle, ignoring the last + // letter. + if (needleLen > 1) { + for (let i = 0; i < needleLen - 1; ++i) + this._occ[needle[i]] = needleLen - 1 - i; + } + } + + reset() { + this.matches = 0; + this._lookbehindSize = 0; + this._bufPos = 0; + } + + push(chunk, pos) { + let result; + if (!Buffer.isBuffer(chunk)) + chunk = Buffer.from(chunk, 'latin1'); + const chunkLen = chunk.length; + this._bufPos = pos || 0; + while (result !== chunkLen && this.matches < this.maxMatches) + result = feed(this, chunk); + return result; + } + + destroy() { + const lbSize = this._lookbehindSize; + if (lbSize) + this._cb(false, this._lookbehind, 0, lbSize, false); + this.reset(); + } +} + +function feed(self, data) { + const len = data.length; + const needle = self._needle; + const needleLen = needle.length; + + // Positive: points to a position in `data` + // pos == 3 points to data[3] + // Negative: points to a position in the lookbehind buffer + // pos == -2 points to lookbehind[lookbehindSize - 2] + let pos = -self._lookbehindSize; + const lastNeedleCharPos = needleLen - 1; + const lastNeedleChar = needle[lastNeedleCharPos]; + const end = len - needleLen; + const occ = self._occ; + const lookbehind = self._lookbehind; + + if (pos < 0) { + // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool + // search with character lookup code that considers both the + // lookbehind buffer and the current round's haystack data. + // + // Loop until + // there is a match. + // or until + // we've moved past the position that requires the + // lookbehind buffer. In this case we switch to the + // optimized loop. + // or until + // the character to look at lies outside the haystack. + while (pos < 0 && pos <= end) { + const nextPos = pos + lastNeedleCharPos; + const ch = (nextPos < 0 + ? lookbehind[self._lookbehindSize + nextPos] + : data[nextPos]); + + if (ch === lastNeedleChar + && matchNeedle(self, data, pos, lastNeedleCharPos)) { + self._lookbehindSize = 0; + ++self.matches; + if (pos > -self._lookbehindSize) + self._cb(true, lookbehind, 0, self._lookbehindSize + pos, false); + else + self._cb(true, undefined, 0, 0, true); + + return (self._bufPos = pos + needleLen); + } + + pos += occ[ch]; + } + + // No match. + + // There's too few data for Boyer-Moore-Horspool to run, + // so let's use a different algorithm to skip as much as + // we can. + // Forward pos until + // the trailing part of lookbehind + data + // looks like the beginning of the needle + // or until + // pos == 0 + while (pos < 0 && !matchNeedle(self, data, pos, len - pos)) + ++pos; + + if (pos < 0) { + // Cut off part of the lookbehind buffer that has + // been processed and append the entire haystack + // into it. + const bytesToCutOff = self._lookbehindSize + pos; + + if (bytesToCutOff > 0) { + // The cut off data is guaranteed not to contain the needle. + self._cb(false, lookbehind, 0, bytesToCutOff, false); + } + + self._lookbehindSize -= bytesToCutOff; + lookbehind.copy(lookbehind, 0, bytesToCutOff, self._lookbehindSize); + lookbehind.set(data, self._lookbehindSize); + self._lookbehindSize += len; + + self._bufPos = len; + return len; + } + + // Discard lookbehind buffer. + self._cb(false, lookbehind, 0, self._lookbehindSize, false); + self._lookbehindSize = 0; + } + + pos += self._bufPos; + + const firstNeedleChar = needle[0]; + + // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool + // search with optimized character lookup code that only considers + // the current round's haystack data. + while (pos <= end) { + const ch = data[pos + lastNeedleCharPos]; + + if (ch === lastNeedleChar + && data[pos] === firstNeedleChar + && memcmp(needle, 0, data, pos, lastNeedleCharPos)) { + ++self.matches; + if (pos > 0) + self._cb(true, data, self._bufPos, pos, true); + else + self._cb(true, undefined, 0, 0, true); + + return (self._bufPos = pos + needleLen); + } + + pos += occ[ch]; + } + + // There was no match. If there's trailing haystack data that we cannot + // match yet using the Boyer-Moore-Horspool algorithm (because the trailing + // data is less than the needle size) then match using a modified + // algorithm that starts matching from the beginning instead of the end. + // Whatever trailing data is left after running this algorithm is added to + // the lookbehind buffer. + while (pos < len) { + if (data[pos] !== firstNeedleChar + || !memcmp(data, pos, needle, 0, len - pos)) { + ++pos; + continue; + } + data.copy(lookbehind, 0, pos, len); + self._lookbehindSize = len - pos; + break; + } + + // Everything until `pos` is guaranteed not to contain needle data. + if (pos > 0) + self._cb(false, data, self._bufPos, pos < len ? pos : len, true); + + self._bufPos = len; + return len; +} + +function matchNeedle(self, data, pos, len) { + const lb = self._lookbehind; + const lbSize = self._lookbehindSize; + const needle = self._needle; + + for (let i = 0; i < len; ++i, ++pos) { + const ch = (pos < 0 ? lb[lbSize + pos] : data[pos]); + if (ch !== needle[i]) + return false; + } + return true; +} + +module.exports = SBMH; diff --git a/node_modules/streamsearch/package.json b/node_modules/streamsearch/package.json new file mode 100644 index 0000000..51df8f9 --- /dev/null +++ b/node_modules/streamsearch/package.json @@ -0,0 +1,34 @@ +{ + "name": "streamsearch", + "version": "1.1.0", + "author": "Brian White <mscdex@mscdex.net>", + "description": "Streaming Boyer-Moore-Horspool searching for node.js", + "main": "./lib/sbmh.js", + "engines": { + "node": ">=10.0.0" + }, + "devDependencies": { + "@mscdex/eslint-config": "^1.1.0", + "eslint": "^7.32.0" + }, + "scripts": { + "test": "node test/test.js", + "lint": "eslint --cache --report-unused-disable-directives --ext=.js .eslintrc.js lib test", + "lint:fix": "npm run lint -- --fix" + }, + "keywords": [ + "stream", + "horspool", + "boyer-moore-horspool", + "boyer-moore", + "search" + ], + "licenses": [{ + "type": "MIT", + "url": "http://github.com/mscdex/streamsearch/raw/master/LICENSE" + }], + "repository": { + "type": "git", + "url": "http://github.com/mscdex/streamsearch.git" + } +} diff --git a/node_modules/streamsearch/test/test.js b/node_modules/streamsearch/test/test.js new file mode 100644 index 0000000..39a04d7 --- /dev/null +++ b/node_modules/streamsearch/test/test.js @@ -0,0 +1,70 @@ +'use strict'; + +const assert = require('assert'); + +const StreamSearch = require('../lib/sbmh.js'); + +[ + { + needle: '\r\n', + chunks: [ + 'foo', + ' bar', + '\r', + '\n', + 'baz, hello\r', + '\n world.', + '\r\n Node.JS rules!!\r\n\r\n', + ], + expect: [ + [false, 'foo'], + [false, ' bar'], + [ true, null], + [false, 'baz, hello'], + [ true, null], + [false, ' world.'], + [ true, null], + [ true, ' Node.JS rules!!'], + [ true, ''], + ], + }, + { + needle: '---foobarbaz', + chunks: [ + '---foobarbaz', + 'asdf', + '\r\n', + '---foobarba', + '---foobar', + 'ba', + '\r\n---foobarbaz--\r\n', + ], + expect: [ + [ true, null], + [false, 'asdf'], + [false, '\r\n'], + [false, '---foobarba'], + [false, '---foobarba'], + [ true, '\r\n'], + [false, '--\r\n'], + ], + }, +].forEach((test, i) => { + console.log(`Running test #${i + 1}`); + const { needle, chunks, expect } = test; + + const results = []; + const ss = new StreamSearch(Buffer.from(needle), + (isMatch, data, start, end) => { + if (data) + data = data.toString('latin1', start, end); + else + data = null; + results.push([isMatch, data]); + }); + + for (const chunk of chunks) + ss.push(Buffer.from(chunk)); + + assert.deepStrictEqual(results, expect); +}); |