Scripted Rest Endpoint w/ Concurrent Import Sets
It has been a while since I posted about importing a million records daily and I realize that it is out of date and not as efficient.
Note: This is pre-quebec because in quebec they have REST Triggers in Flow Designer (so I may write up a different one for that)
So I am just going to dive into some changes from my original post:
- Make the Scripted REST Endpoint as “dumb” as possible. Essentially it will just spin up a progress worker and check to make sure the payload is formatted correctly.
- Create a script include that will do all the work and get called by the progress worker.
- Create temporary data sources and temporary concurrent import sets to handle the speed.
By the end of this, it should take about ~20 minutes to process a million record (if setup correctly).
Step 1: Making the Scripted Endpoint “Dumb”
function process( /*RESTAPIRequest*/ request, /*RESTAPIResponse*/ response) {
var request_body = request.body.data;
if (request_body && request_body.data_payload && request_body.data_payload.length > 0) {
var worker = new GlideScriptedHierarchicalWorker();
var now = new Date().getTime().toString();
worker.setProgressName('Import Job Matching Data ' + now);
worker.setBackground(true);
worker.setScriptIncludeName('global.ScriptIncludeName');
worker.setScriptIncludeMethod('runAsync');
worker.putMethodArg('requestBody', request_body);
worker.start();
response.setStatus('201');
} else {
response.setStatus('400');
}
})(request, response);
So at the core:
Line 3: Checks the payload to make sure it is formed correctly
Line 4: Creates the background worker
Line 8: Calls the script include
Line 9: Calls the function within the script include
Line 10: passes the request body to the Script Include
Step 2 and 3: Concurrent Import Sets
The script magic (it should be self explained, but add comments if you need me to add more info)
var ScriptIncludeName = Class.create();
ScriptIncludeName.prototype = {
initialize: function() {},
runAsync: function(requestBody) {
/* Create Temporary Data Source and Concurrent Scheduled Import */
var data_source = this.newDataSourceJSON('<staging table>', '<data_payload/data_payload>');
var sh_import = this.newScheduledConcurrentImport(data_source, '<user to run this as sys id>');
/* Save Attachment to Data Source */
this.writeAttachment(data_source, requestBody);
/* Run the import */
// Scheduled Jobs will run 30 seconds after creation
},
writeAttachment: function(gr_sysid, request_body) {
var grDS = new GlideRecord('sys_data_source');
if (grDS.get(gr_sysid)) {
var attachment = new GlideSysAttachment();
var now = new Date().getTime().toString();
attachment.write(grDS, "data_" + now + ".json", "application/json", JSON.stringify(request_body));
}
},
cleanUpData: function(table_name1, sysid1, table_name2, sysid2) {
//clean up data source
var grDS = new GlideRecord(table_name1);
if (grDS.get(sysid1)) {
var p_attachment = new GlideSysAttachment();
var currentAttachments = p_attachment.getAttachments(table_name1, sysid1);
if (currentAttachments.hasNext()) {
currentAttachments.deleteMultiple();
}
grDS.deleteMultiple();
}
// clean up scheduled imports
var grSJ = new GlideRecord(table_name2);
if (grSJ.get(sysid2)) {
grSJ.deleteMultiple();
}
},
newDataSourceJSON: function(import_set_table_name, root_node) {
var grDS = new GlideRecord('sys_data_source');
grDS.initialize();
var now = new Date().getTime().toString();
grDS.name = 'Temp Job Matching Data JSON ' + now;
grDS.import_set_table_name = import_set_table_name;
grDS.type = 'File';
grDS.format = 'JSON';
grDS.jpath_root_node = root_node;
//grDS.discard_arrays = true;
grDS.file_retrieval_method = 'Attachment';
var DS_RECORD_SYSID = grDS.insert();
return DS_RECORD_SYSID;
},
newScheduledConcurrentImport: function(data_source_sys_id, user_sys_id) {
var grSCI = new GlideRecord('scheduled_import_set');
var newGUID = gs.generateGUID();
grSCI.initialize();
grSCI.setNewGuidValue(newGUID);
var now = new Date().getTime().toString();
grSCI.name = 'Temp Scheduled Import ' + now;
grSCI.data_source = data_source_sys_id;
grSCI.run_as = user_sys_id;
grSCI.active = true;
grSCI.run_type = 'periodically';
grSCI.run_period = new GlideDuration(36000000); //10 hours
var run_start = new GlideDateTime();
var run_start_future = new GlideDateTime(run_start.getValue());
run_start_future.add(-35940000); // run in 30 seconds
grSCI.run_start = run_start_future;
grSCI.concurrent_import = true;
grSCI.post_script_bool = true;
var post_script = "new global.ScriptIncludeName().cleanUpData('sys_data_source','" + data_source_sys_id + "','scheduled_import_set','" + newGUID + "')";
grSCI.post_script = post_script;
grSCI.insert();
return newGUID;
},
type: 'ScriptIncludeName'
};
And there you have it! An awesome, re-usable scripted rest endpoint to import millions of daily records.
comments powered by Disqus