Create a Bulk Extract Job

Extract a list of URLs asynchronously

To create a bulk job, make a POST request to this endpoint.

Payload Setup

Set your Content-Type header to application/x-www-form-urlencoded (not multipart/form-data). Your POST body content should be in querystring format (key/value pairs), for example:

name=bulkTest&token=YOURDIFFBOTTOKEN&urls=https://www.diffbot.com https://blog.diffbot.com&apiUrl=https://api.diffbot.com/v3/analyze

Response

Upon adding a new bulk job, you will receive a success message in the JSON response, in addition to full job details:

{
    "response": "Successfully added urls for spidering.",
    "jobs": [
        {
            "jobStatus": {
                "message": "Job is initializing.",
                "status": 0
            },
            "maxHops": -1,
            "downloadJson": "...json",
            "urlProcessPattern": "",
            "jobCompletionTimeUTC": 0,
            "maxRounds": -1,
            "type": "bulk",
            "pageCrawlSuccessesThisRound": 0,
            "urlCrawlRegEx": "",
            "pageProcessPattern": "",
            "apiUrl": "https://api.diffbot.com/v3/analyze",
            "useCanonical": 1,
            "jobCreationTimeUTC": 1649950325,
            "repeat": 0,
            "downloadUrls": "...csv",
            "obeyRobots": 1,
            "roundsCompleted": 0,
            "pageCrawlAttempts": 0,
            "notifyWebhook": "",
            "pageProcessSuccessesThisRound": 0,
            "customHeaders": {},
            "objectsFound": 0,
            "roundStartTime": 0,
            "urlCrawlPattern": "",
            "seedRecrawlFrequency": -1,
            "urlProcessRegEx": "",
            "pageProcessSuccesses": 0,
            "urlsHarvested": 0,
            "crawlDelay": -1,
            "currentTime": 1649950325,
            "useProxies": 0,
            "sentJobDoneNotification": 0,
            "currentTimeUTC": 1649950325,
            "name": "bulkTest",
            "notifyEmail": "",
            "pageCrawlSuccesses": 0,
            "pageProcessAttempts": 0
        }
    ]
}
Language
Click Try It! to start a request and see the response here!