Importing a million records daily in ServiceNow Part 2 (Pre-Quebec)

Published On: 2021-02-05, Reading Time: 3 minutes

Scripted Rest Endpoint w/ Concurrent Import Sets

It has been a while since I posted about importing a million records daily and I realize that it is out of date and not as efficient.

Note: This is pre-quebec because in quebec they have REST Triggers in Flow Designer (so I may write up a different one for that)

So I am just going to dive into some changes from my original post:

  1. Make the Scripted REST Endpoint as “dumb” as possible. Essentially it will just spin up a progress worker and check to make sure the payload is formatted correctly.
  2. Create a script include that will do all the work and get called by the progress worker.
  3. Create temporary data sources and temporary concurrent import sets to handle the speed.

By the end of this, it should take about ~20 minutes to process a million record (if setup correctly).

Step 1: Making the Scripted Endpoint “Dumb”

function process( /*RESTAPIRequest*/ request, /*RESTAPIResponse*/ response) {
    var request_body = request.body.data;
    if (request_body && request_body.data_payload && request_body.data_payload.length > 0) {
        var worker = new GlideScriptedHierarchicalWorker();
        var now = new Date().getTime().toString();
        worker.setProgressName('Import Job Matching Data ' + now);
        worker.setBackground(true);
        worker.setScriptIncludeName('global.ScriptIncludeName');
        worker.setScriptIncludeMethod('runAsync');
        worker.putMethodArg('requestBody', request_body);
        worker.start();
        response.setStatus('201');
    } else {
        response.setStatus('400');
    }
})(request, response);

So at the core:

Line 3: Checks the payload to make sure it is formed correctly

Line 4: Creates the background worker

Line 8: Calls the script include

Line 9: Calls the function within the script include

Line 10: passes the request body to the Script Include

Step 2 and 3: Concurrent Import Sets

The script magic (it should be self explained, but add comments if you need me to add more info)

var ScriptIncludeName = Class.create();
ScriptIncludeName.prototype = {
    initialize: function() {},
    runAsync: function(requestBody) {
        /* Create Temporary Data Source and Concurrent Scheduled Import */
        var data_source = this.newDataSourceJSON('<staging table>', '<data_payload/data_payload>');
        var sh_import = this.newScheduledConcurrentImport(data_source, '<user to run this as sys id>');
        
        /* Save Attachment to Data Source */
        this.writeAttachment(data_source, requestBody);
        
        /* Run the import */
        // Scheduled Jobs will run 30 seconds after creation
    },
    writeAttachment: function(gr_sysid, request_body) {
        var grDS = new GlideRecord('sys_data_source');
        if (grDS.get(gr_sysid)) {
            var attachment = new GlideSysAttachment();
            var now = new Date().getTime().toString();
            attachment.write(grDS, "data_" + now + ".json", "application/json", JSON.stringify(request_body));
        }

    },
    cleanUpData: function(table_name1, sysid1, table_name2, sysid2) {
        //clean up data source
        var grDS = new GlideRecord(table_name1);
        if (grDS.get(sysid1)) {
            var p_attachment = new GlideSysAttachment();
            var currentAttachments = p_attachment.getAttachments(table_name1, sysid1);
            if (currentAttachments.hasNext()) {
                currentAttachments.deleteMultiple();
            }
            grDS.deleteMultiple();
        }
        // clean up scheduled imports
        var grSJ = new GlideRecord(table_name2);
        if (grSJ.get(sysid2)) {
            grSJ.deleteMultiple();
        }
    },
    newDataSourceJSON: function(import_set_table_name, root_node) {
        var grDS = new GlideRecord('sys_data_source');
        grDS.initialize();
        var now = new Date().getTime().toString();
        grDS.name = 'Temp Job Matching Data JSON ' + now;
        grDS.import_set_table_name = import_set_table_name;
        grDS.type = 'File';
        grDS.format = 'JSON';
        grDS.jpath_root_node = root_node;
        //grDS.discard_arrays = true;
        grDS.file_retrieval_method = 'Attachment';
        var DS_RECORD_SYSID = grDS.insert();
        return DS_RECORD_SYSID;
    },
    newScheduledConcurrentImport: function(data_source_sys_id, user_sys_id) {
        var grSCI = new GlideRecord('scheduled_import_set');
        var newGUID = gs.generateGUID();
        grSCI.initialize();
        grSCI.setNewGuidValue(newGUID);
        var now = new Date().getTime().toString();
        grSCI.name = 'Temp Scheduled Import ' + now;
        grSCI.data_source = data_source_sys_id;
        grSCI.run_as = user_sys_id;
        grSCI.active = true;
        grSCI.run_type = 'periodically';
        grSCI.run_period = new GlideDuration(36000000); //10 hours
        var run_start = new GlideDateTime();
        var run_start_future = new GlideDateTime(run_start.getValue());
        run_start_future.add(-35940000); // run in 30 seconds
        grSCI.run_start = run_start_future;
        grSCI.concurrent_import = true;
        grSCI.post_script_bool = true;
        var post_script = "new global.ScriptIncludeName().cleanUpData('sys_data_source','" + data_source_sys_id + "','scheduled_import_set','" + newGUID + "')";
        grSCI.post_script = post_script;
        grSCI.insert();
        return newGUID;
    },
    type: 'ScriptIncludeName'
};

And there you have it! An awesome, re-usable scripted rest endpoint to import millions of daily records.

comments powered by Disqus