Create a Bulk Extract Job

Extract a list of URLs asynchronously

To create a bulk job, make a POST request to this endpoint.

Payload Setup

Set your Content-Type header to application/x-www-form-urlencoded (not multipart/form-data). Your POST body content should be in querystring format (key/value pairs), for example:

name=bulkTest&token=YOURDIFFBOTTOKEN&urls=https://www.diffbot.com https://blog.diffbot.com&apiUrl=https://api.diffbot.com/v3/analyze

Response

Upon adding a new bulk job, you will receive a success message in the JSON response, in addition to full job details:

{
  "response": "Successfully added urls for spidering.",
  "jobs": [
    {
      "jobStatus": {
        "message": "Job is initializing.",
        "status": 0
      },
      "maxHops": -1,
      "downloadJson": "...json",
      "urlProcessPattern": "",
      "jobCompletionTimeUTC": 0,
      "maxRounds": -1,
      "type": "bulk",
      "pageCrawlSuccessesThisRound": 0,
      "urlCrawlRegEx": "",
      "pageProcessPattern": "",
      "apiUrl": "https://api.diffbot.com/v3/analyze",
      "useCanonical": 1,
      "jobCreationTimeUTC": 1649950325,
      "repeat": 0,
      "downloadUrls": "...csv",
      "obeyRobots": 1,
      "roundsCompleted": 0,
      "pageCrawlAttempts": 0,
      "notifyWebhook": "",
      "pageProcessSuccessesThisRound": 0,
      "customHeaders": {},
      "objectsFound": 0,
      "roundStartTime": 0,
      "urlCrawlPattern": "",
      "seedRecrawlFrequency": -1,
      "urlProcessRegEx": "",
      "pageProcessSuccesses": 0,
      "urlsHarvested": 0,
      "crawlDelay": -1,
      "currentTime": 1649950325,
      "useProxies": 0,
      "sentJobDoneNotification": 0,
      "currentTimeUTC": 1649950325,
      "name": "bulkTest",
      "notifyEmail": "",
      "pageCrawlSuccesses": 0,
      "pageProcessAttempts": 0
    }
  ]
}
Language
Click Try It! to start a request and see the response here!