diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c3629e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..f0418c1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,8 @@ +language: node_js +node_js: +- '0.12' +- '0.10' +- '0.8' +- 'iojs' +before_install: +- npm install -g npm@~1.4.6 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7cdc145 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Thomas Watson Steen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ea19f07 --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +# cheerio-advanced-selectors + +Add advanced selector support to cheerio + +[![Build status](https://travis-ci.org/watson/cheerio-advanced-selectors.svg?branch=master)](https://travis-ci.org/watson/cheerio-advanced-selectors) +[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat)](https://github.com/feross/standard) + +This module is inspired by +[cheerio-eq](https://github.com/watson/cheerio-eq) with the added +support for many different selectors. + +## Installation + +``` +npm install cheerio-advanced-selectors +``` + +## Usage + +Use the `.wrap()` function to make cheerio-advanced-selectors take care +of everything for you: + +```js +var cheerio = require('cheerio') +var cheerioAdv = require('cheerio-advanced-selectors') + +cheerio = cheerioAdv.wrap(cheerio) + +var $ = cheerio.load('
foo
bar
') + +$('div:first').text() // => 'foo' +``` + +Note that this will only work if the HTML is loaded using the `.load()` +function as seen above. + +Alternatively use the `.find()` function to only use +cheerio-advanced-selectors for a specific selector: + +```js +var cheerio = require('cheerio') +var cheerioAdv = require('cheerio-advanced-selectors') + +var $ = cheerio.load('
foo
bar
') + +cheerioAdv.find($, 'div:eq(1)').text() // => 'bar' +``` + +If you need to run the same selector on a lot of different HTML +documents, you can speed things up by pre-compiling the selector using +the `.compile()` function: + +```js +var cheerio = require('cheerio') +var cheerioAdv = require('cheerio-advanced-selectors') + +var myH1 = cheerioAdv.compile('div:first span:eq(1) h1') + +var html1 = cheerio.load('

foo1

bar1

') +var html2 = cheerio.load('

foo2

bar2

') + +myH1(html1).text() // => 'bar1' +myH1(html2).text() // => 'bar2' +``` + +## Supported advanced selectors + +This module currently only support a minimal subset of the possible +advanced selectors: + +- `:first` +- `:last` +- `:eq(index)` + +But don't fear :) It's easy to add support for other selectors. Just +[open an +issue](https://github.com/watson/cheerio-advanced-selectors/issues) or +make a pull request. + +## API + +#### `.wrap(cheerio)` + +Wraps the main cheerio module to overload the standard `load` function +so it knows how to handle the advanced selectors. + +Returns the `cheerio` module. + +#### `.find(cheerio, selector [, context [, root]])` + +Run the `selector` on the given cheerio object optionally within the +given `context` and optionally on the given `root`. + +The `cheerio` object is usually called `$`. + +#### `.compile(selector)` + +Compiles the `selector` and returns a function which take 3 arguments: +`fn(cheerio [, context [, root]])`: + +- `cheerio` - a reference to the cheerio object (usually called `$`) +- `context` - the context in which to run the selector (optional) +- `root` - the HTML root on which to run the selector (optional) + +## License + +MIT diff --git a/index.js b/index.js new file mode 100644 index 0000000..98575d4 --- /dev/null +++ b/index.js @@ -0,0 +1,61 @@ +'use strict' + +var splitter = /^(.*?)(?:\:(eq|first|last)(?:\((\d+)\))?)(.*)/ + +exports.wrap = function (cheerio) { + var load = cheerio.load + + cheerio.load = function () { + var $ = load.apply(cheerio, arguments) + return exports.find.bind(null, $) + } + + return cheerio +} + +exports.find = function ($, selector, context, root) { + return exports.compile(selector)($, context, root) +} + +exports.compile = function (selector) { + var parts = [] + var match = selector.match(splitter) + + while (match) { + parts.push(match[1]) + parts.push(exports['_' + match[2]](match[3])) + selector = match[4].trim() + match = selector.match(splitter) + } + parts.push(selector) + selector = parts.shift() + + parts = parts.filter(function (part) { + return part !== '' + }) + + return function ($, context, root) { + return parts.reduce(function (cursor, part) { + return typeof part === 'function' ? part(cursor) : cursor.find(part) + }, $(selector, context, root)) + } +} + +exports._eq = function (index) { + index = parseInt(index, 10) + return function (cursor) { + return cursor.eq(index) + } +} + +exports._first = function () { + return function (cursor) { + return cursor.first() + } +} + +exports._last = function () { + return function (cursor) { + return cursor.last() + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..a79b198 --- /dev/null +++ b/package.json @@ -0,0 +1,36 @@ +{ + "name": "cheerio-advanced-selectors", + "version": "0.0.0", + "description": "Add advanced selector support to cheerio", + "main": "index.js", + "scripts": { + "test": "standard && tape test.js" + }, + "repository": { + "type": "git", + "url": "https://github.com/watson/cheerio-advanced-selectors.git" + }, + "devDependencies": { + "cheerio": "^0.19.0", + "standard": "^4.4.1", + "tape": "^4.0.0" + }, + "keywords": [ + "cheerio", + "jquery", + "query", + "queries", + "selector", + "selectors", + "find", + "last", + "first", + "eq" + ], + "author": "Thomas Watson Steen (https://twitter.com/wa7son)", + "license": "MIT", + "bugs": { + "url": "https://github.com/watson/cheerio-advanced-selectors/issues" + }, + "homepage": "https://github.com/watson/cheerio-advanced-selectors" +} diff --git a/test.js b/test.js new file mode 100644 index 0000000..7307db9 --- /dev/null +++ b/test.js @@ -0,0 +1,107 @@ +'use strict' + +var test = require('tape') +var cheerio = require('cheerio') +var cheerioAdv = require('./') + +test('#compile()', function (t) { + t.test('Non-advanced selector', function (t) { + var compiled = cheerioAdv.compile('div') + var html, $ + + html = '
foo1
bar1
' + $ = cheerio.load(html) + t.equal(compiled($).text(), 'foo1bar1') + + html = '
foo2
bar2
' + $ = cheerio.load(html) + t.equal(compiled($).text(), 'foo2bar2') + + t.end() + }) + + t.test('Simple selector ending in :first()', function (t) { + var compiled = cheerioAdv.compile('div:first') + var html, $ + + html = '
foo1
bar1
' + $ = cheerio.load(html) + t.equal(compiled($).text(), 'foo1') + + html = '
foo2
bar2
' + $ = cheerio.load(html) + t.equal(compiled($).text(), 'foo2') + + t.end() + }) + + t.test('Custom context', function (t) { + var compiled = cheerioAdv.compile('div:eq(1)') + var html + + html = '
foo1
bar1
' + t.equal(compiled(cheerio, html).text(), 'bar1') + + html = '
foo2
bar2
' + t.equal(compiled(cheerio, html).text(), 'bar2') + + t.end() + }) + + t.test('Custom root', function (t) { + var compiled = cheerioAdv.compile('span:eq(1)') + var html + + html = '
foo1
bar1
' + t.equal(compiled(cheerio, 'div', html).text(), 'bar1') + + html = '
foo2
bar2
' + t.equal(compiled(cheerio, 'div', html).text(), 'bar2') + + t.end() + }) +}) + +var testCases = [ + ['Non-advanced selector', '
foo
bar
', 'div', 'foobar'], + ['Simple selector ending in :first()', '
foo
bar
', 'div:first', 'foo'], + ['Simple selector ending in :last()', '
foo
bar
', 'div:last', 'bar'], + ['Simple selector ending in :eq()', '
foo
bar
', 'div:eq(1)', 'bar'], + ['Simple selector with :eq() in the middle', '
foo
bar
', 'div:eq(0) span', 'foo'], + ['Complex selector', '

foo

bar

', 'div:first span:eq(1) h1', 'bar'] +] + +test('#find()', function (t) { + testCases.forEach(function (testCase) { + t.test(testCase[0], function (t) { + var $ = cheerio.load(testCase[1]) + t.equal(cheerioAdv.find($, testCase[2]).text(), testCase[3]) + t.end() + }) + }) + + t.test('Custom context', function (t) { + var html = '
foo
bar
' + t.equal(cheerioAdv.find(cheerio, 'div:eq(1)', html).text(), 'bar') + t.end() + }) + + t.test('Custom root', function (t) { + var html = '
foo
bar
' + t.equal(cheerioAdv.find(cheerio, 'span:eq(1)', 'div', html).text(), 'bar') + t.end() + }) +}) + +test('#wrap()', function (t) { + testCases.forEach(function (testCase) { + t.test(testCase[0], function (t) { + var load = cheerio.load + var wrapped = cheerioAdv.wrap(cheerio) + var $ = wrapped.load(testCase[1]) + t.equal($(testCase[2]).text(), testCase[3]) + cheerio.load = load + t.end() + }) + }) +})