Skip to content
This repository has been archived by the owner on Oct 21, 2022. It is now read-only.

Commit

Permalink
adds a cheerio parser for HTML and XML body content
Browse files Browse the repository at this point in the history
  • Loading branch information
santiagogak committed Mar 1, 2018
1 parent c3634f2 commit 88129a7
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 2 deletions.
44 changes: 44 additions & 0 deletions lib/Adaptor.js
Expand Up @@ -14,6 +14,7 @@ exports.post = post;
exports.put = put;
exports.patch = patch;
exports.del = del;
exports.parse = parse;

var _languageCommon = require('language-common');

Expand Down Expand Up @@ -76,6 +77,16 @@ var _Client = require('./Client');

var _Utils = require('./Utils');

var _cheerio = require('cheerio');

var _cheerio2 = _interopRequireDefault(_cheerio);

var _cheerioTableparser = require('cheerio-tableparser');

var _cheerioTableparser2 = _interopRequireDefault(_cheerioTableparser);

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

function _objectWithoutProperties(obj, keys) { var target = {}; for (var i in obj) { if (keys.indexOf(i) >= 0) continue; if (!Object.prototype.hasOwnProperty.call(obj, i)) continue; target[i] = obj[i]; } return target; }

/**
Expand Down Expand Up @@ -313,3 +324,36 @@ function del(path, params, callback) {
});
};
}

/**
* Cheerio parser for XML and HTML
* @public
* @example
* parse(body, function($){
* return $("table[class=your_table]").parsetable(true, true, true);
* })
* @function
* @param {String} body - data string to be parsed
* @param {function} script - script for extracting data
* @returns {Operation}
*/
function parse(body, script) {

return function (state) {

var $ = _cheerio2.default.load(body);
(0, _cheerioTableparser2.default)($);

if (script) {
var result = script($);
try {
var r = JSON.parse(result);
return (0, _languageCommon.composeNextState)(state, r);
} catch (e) {
return (0, _languageCommon.composeNextState)(state, { body: result });
}
} else {
return (0, _languageCommon.composeNextState)(state, { body: body });
}
};
}
6 changes: 4 additions & 2 deletions package.json
@@ -1,6 +1,6 @@
{
"name": "language-http",
"version": "2.0.0",
"version": "2.1.0",
"description": "An HTTP request language package for use with Open Function",
"main": "lib/index.js",
"scripts": {
Expand All @@ -15,7 +15,9 @@
],
"dependencies": {
"language-common": "github:openfn/language-common#v0.1.1",
"request": "^2.72.0"
"request": "^2.72.0",
"cheerio": "1.0.0-rc.2",
"cheerio-tableparser": "1.0.1"
},
"devDependencies": {
"assertion-error": "^1.0.1",
Expand Down
36 changes: 36 additions & 0 deletions src/Adaptor.js
Expand Up @@ -6,6 +6,8 @@ import {
expandReferences,
composeNextState
} from 'language-common';
import cheerio from 'cheerio';
import cheerioTableparser from 'cheerio-tableparser';

/**
* Execute a sequence of operations.
Expand Down Expand Up @@ -226,6 +228,40 @@ export function del(path, params, callback) {
}
}


/**
* Cheerio parser for XML and HTML
* @public
* @example
* parse(body, function($){
* return $("table[class=your_table]").parsetable(true, true, true);
* })
* @function
* @param {String} body - data string to be parsed
* @param {function} script - script for extracting data
* @returns {Operation}
*/
export function parse(body, script) {

return state => {

const $ = cheerio.load(body);
cheerioTableparser($);

if(script) {
const result = script($)
try {
const r = JSON.parse(result);
return composeNextState(state, r)
} catch(e) {
return composeNextState(state, {body: result})
}
} else {
return composeNextState(state, {body: body})
}
}
}

export {
alterState,
dataPath,
Expand Down

0 comments on commit 88129a7

Please sign in to comment.