post https://api.diffbot.com/v3/bulk
Extract a list of URLs asynchronously
To create a bulk job, make a POST request to this endpoint.
Payload Setup
Set your Content-Type
header to application/x-www-form-urlencoded
(not multipart/form-data). Your POST body content should be in querystring format (key/value pairs), for example:
name=bulkTest&token=YOURDIFFBOTTOKEN&urls=https://www.diffbot.com https://blog.diffbot.com&apiUrl=https://api.diffbot.com/v3/analyze
Response
Upon adding a new bulk job, you will receive a success message in the JSON response, in addition to full job details:
{
"response": "Successfully added urls for spidering.",
"jobs": [
{
"jobStatus": {
"message": "Job is initializing.",
"status": 0
},
"maxHops": -1,
"downloadJson": "...json",
"urlProcessPattern": "",
"jobCompletionTimeUTC": 0,
"maxRounds": -1,
"type": "bulk",
"pageCrawlSuccessesThisRound": 0,
"urlCrawlRegEx": "",
"pageProcessPattern": "",
"apiUrl": "https://api.diffbot.com/v3/analyze",
"useCanonical": 1,
"jobCreationTimeUTC": 1649950325,
"repeat": 0,
"downloadUrls": "...csv",
"obeyRobots": 1,
"roundsCompleted": 0,
"pageCrawlAttempts": 0,
"notifyWebhook": "",
"pageProcessSuccessesThisRound": 0,
"customHeaders": {},
"objectsFound": 0,
"roundStartTime": 0,
"urlCrawlPattern": "",
"seedRecrawlFrequency": -1,
"urlProcessRegEx": "",
"pageProcessSuccesses": 0,
"urlsHarvested": 0,
"crawlDelay": -1,
"currentTime": 1649950325,
"useProxies": 0,
"sentJobDoneNotification": 0,
"currentTimeUTC": 1649950325,
"name": "bulkTest",
"notifyEmail": "",
"pageCrawlSuccesses": 0,
"pageProcessAttempts": 0
}
]
}