urllib.js 41 KB


  1. 'use strict';
  2. var debug = require('debug')('urllib');
  3. var path = require('path');
  4. var dns = require('dns');
  5. var http = require('http');
  6. var https = require('https');
  7. var urlutil = require('url');
  8. var URL = urlutil.URL;
  9. var util = require('util');
  10. var qs = require('qs');
  11. var ip = require('ip');
  12. var querystring = require('querystring');
  13. var zlib = require('zlib');
  14. var ua = require('default-user-agent');
  15. var digestAuthHeader = require('digest-header');
  16. var ms = require('humanize-ms');
  17. var statuses = require('statuses');
  18. var contentTypeParser = require('content-type');
  19. var first = require('ee-first');
  20. var pump = require('pump');
  21. var utility = require('utility');
  22. var FormStream = require('formstream');
  23. var detectProxyAgent = require('./detect_proxy_agent');
  24. var _Promise;
  25. var _iconv;
  26. var pkg = require('../package.json');
  27. var USER_AGENT = exports.USER_AGENT = ua('node-urllib', pkg.version);
  28. var NODE_MAJOR_VERSION = parseInt(process.versions.node.split('.')[0]);
  29. // change Agent.maxSockets to 1000
  30. exports.agent = new http.Agent();
  31. exports.agent.maxSockets = 1000;
  32. exports.httpsAgent = new https.Agent();
  33. exports.httpsAgent.maxSockets = 1000;
  34. var LONG_STACK_DELIMITER = '\n --------------------\n';
  35. /**
  36. * The default request timeout(in milliseconds).
  37. * @type {Number}
  38. * @const
  39. */
  40. exports.TIMEOUT = ms('5s');
  41. exports.TIMEOUTS = [ms('5s'), ms('5s')];
  42. var REQUEST_ID = 0;
  43. var MAX_VALUE = Math.pow(2, 31) - 10;
  44. var isNode010 = /^v0\.10\.\d+$/.test(process.version);
  45. var isNode012 = /^v0\.12\.\d+$/.test(process.version);
  46. /**
  47. * support data types
  48. * will auto decode response body
  49. * @type {Array}
  50. */
  51. var TEXT_DATA_TYPES = [
  52. 'json',
  53. 'text'
  54. ];
  55. var PROTO_RE = /^https?:\/\//i;
  56. // Keep-Alive: timeout=5, max=100
  57. var KEEP_ALIVE_RE = /^timeout=(\d+)/i;
  58. var SOCKET_REQUEST_COUNT = '_URLLIB_SOCKET_REQUEST_COUNT';
  59. var SOCKET_RESPONSE_COUNT = '_URLLIB_SOCKET_RESPONSE_COUNT';
  60. /**
  61. * Handle all http request, both http and https support well.
  62. *
  63. * @example
  64. *
  65. * ```js
  66. * // GET https://nodejs.org
  67. * urllib.request('https://nodejs.org', function(err, data, res) {});
  68. * // POST https://nodejs.org
  69. * var args = { type: 'post', data: { foo: 'bar' } };
  70. * urllib.request('https://nodejs.org', args, function(err, data, res) {});
  71. * ```
  72. *
  73. * @param {String|Object} url: the request full URL.
  74. * @param {Object} [args]: optional
  75. * - {Object} [data]: request data, will auto be query stringify.
  76. * - {Boolean} [dataAsQueryString]: force convert `data` to query string.
  77. * - {String|Buffer} [content]: optional, if set content, `data` will ignore.
  78. * - {ReadStream} [stream]: read stream to sent.
  79. * - {WriteStream} [writeStream]: writable stream to save response data.
  80. * If you use this, callback's data should be null.
  81. * We will just `pipe(ws, {end: true})`.
  82. * - {consumeWriteStream} [true]: consume the writeStream, invoke the callback after writeStream close.
  83. * - {Array<ReadStream|Buffer|String>|Object|ReadStream|Buffer|String} [files]: optional,
  84. * The files will send with `multipart/form-data` format, base on `formstream`.
  85. * If `method` not set, will use `POST` method by default.
  86. * - {String} [method]: optional, could be GET | POST | DELETE | PUT, default is GET
  87. * - {String} [contentType]: optional, request data type, could be `json`, default is undefined
  88. * - {String} [dataType]: optional, response data type, could be `text` or `json`, default is buffer
  89. * - {Boolean|Function} [fixJSONCtlChars]: optional, fix the control characters (U+0000 through U+001F)
  90. * before JSON parse response. Default is `false`.
  91. * `fixJSONCtlChars` can be a function, will pass data to the first argument. e.g.: `data = fixJSONCtlChars(data)`
  92. * - {Object} [headers]: optional, request headers
  93. * - {Boolean} [keepHeaderCase]: optional, by default will convert header keys to lowercase
  94. * - {Number|Array} [timeout]: request timeout(in milliseconds), default is `exports.TIMEOUTS containing connect timeout and response timeout`
  95. * - {Agent} [agent]: optional, http agent. Set `false` if you does not use agent.
  96. * - {Agent} [httpsAgent]: optional, https agent. Set `false` if you does not use agent.
  97. * - {String} [auth]: Basic authentication i.e. 'user:password' to compute an Authorization header.
  98. * - {String} [digestAuth]: Digest authentication i.e. 'user:password' to compute an Authorization header.
  99. * - {String|Buffer|Array} [ca]: An array of strings or Buffers of trusted certificates.
  100. * If this is omitted several well known "root" CAs will be used, like VeriSign.
  101. * These are used to authorize connections.
  102. * Notes: This is necessary only if the server uses the self-signed certificate
  103. * - {Boolean} [rejectUnauthorized]: If true, the server certificate is verified against the list of supplied CAs.
  104. * An 'error' event is emitted if verification fails. Default: true.
  105. * - {String|Buffer} [pfx]: A string or Buffer containing the private key,
  106. * certificate and CA certs of the server in PFX or PKCS12 format.
  107. * - {String|Buffer} [key]: A string or Buffer containing the private key of the client in PEM format.
  108. * Notes: This is necessary only if using the client certificate authentication
  109. * - {String|Buffer} [cert]: A string or Buffer containing the certificate key of the client in PEM format.
  110. * Notes: This is necessary only if using the client certificate authentication
  111. * - {String} [passphrase]: A string of passphrase for the private key or pfx.
  112. * - {String} [ciphers]: A string describing the ciphers to use or exclude.
  113. * - {String} [secureProtocol]: The SSL method to use, e.g. SSLv3_method to force SSL version 3.
  114. * The possible values depend on your installation of OpenSSL and are defined in the constant SSL_METHODS.
  115. * - {Boolean} [followRedirect]: Follow HTTP 3xx responses as redirects. defaults to false.
  116. * - {Number} [maxRedirects]: The maximum number of redirects to follow, defaults to 10.
  117. * - {Function(from, to)} [formatRedirectUrl]: Format the redirect url by your self. Default is `url.resolve(from, to)`
  118. * - {Function(options)} [beforeRequest]: Before request hook, you can change every thing here.
  119. * - {Boolean} [streaming]: let you get the res object when request connected, default is `false`. alias `customResponse`
  120. * - {Boolean} [gzip]: Accept gzip response content and auto decode it, default is `false`.
  121. * - {Boolean} [timing]: Enable timing or not, default is `false`.
  122. * - {Function} [lookup]: Custom DNS lookup function, default is `dns.lookup`.
  123. * Require node >= 4.0.0 and only work on `http` protocol.
  124. * - {Boolean} [enableProxy]: optional, enable proxy request. Default is `false`.
  125. * - {String|Object} [proxy]: optional proxy agent uri or options. Default is `null`.
  126. * - {Function} checkAddress: optional, check request address to protect from SSRF and similar attacks.
  127. * @param {Function} [callback]: callback(error, data, res). If missing callback, will return a promise object.
  128. * @return {HttpRequest} req object.
  129. * @api public
  130. */
  131. exports.request = function request(url, args, callback) {
  132. // request(url, callback)
  133. if (arguments.length === 2 && typeof args === 'function') {
  134. callback = args;
  135. args = null;
  136. }
  137. if (typeof callback === 'function') {
  138. return exports.requestWithCallback(url, args, callback);
  139. }
  140. // Promise
  141. if (!_Promise) {
  142. _Promise = require('any-promise');
  143. }
  144. return new _Promise(function (resolve, reject) {
  145. exports.requestWithCallback(url, args, makeCallback(resolve, reject));
  146. });
  147. };
  148. // alias to curl
  149. exports.curl = exports.request;
  150. function makeCallback(resolve, reject) {
  151. return function (err, data, res) {
  152. if (err) {
  153. return reject(err);
  154. }
  155. resolve({
  156. data: data,
  157. status: res.statusCode,
  158. headers: res.headers,
  159. res: res
  160. });
  161. };
  162. }
  163. // yield urllib.requestThunk(url, args)
  164. exports.requestThunk = function requestThunk(url, args) {
  165. return function (callback) {
  166. exports.requestWithCallback(url, args, function (err, data, res) {
  167. if (err) {
  168. return callback(err);
  169. }
  170. callback(null, {
  171. data: data,
  172. status: res.statusCode,
  173. headers: res.headers,
  174. res: res
  175. });
  176. });
  177. };
  178. };
  179. function requestWithCallback(url, args, callback) {
  180. var req;
  181. // requestWithCallback(url, callback)
  182. if (!url || (typeof url !== 'string' && typeof url !== 'object')) {
  183. var msg = util.format('expect request url to be a string or a http request options, but got %j', url);
  184. throw new Error(msg);
  185. }
  186. if (arguments.length === 2 && typeof args === 'function') {
  187. callback = args;
  188. args = null;
  189. }
  190. args = args || {};
  191. if (REQUEST_ID >= MAX_VALUE) {
  192. REQUEST_ID = 0;
  193. }
  194. var reqId = ++REQUEST_ID;
  195. args.requestUrls = args.requestUrls || [];
  196. args.timeout = args.timeout || exports.TIMEOUTS;
  197. args.maxRedirects = args.maxRedirects || 10;
  198. args.streaming = args.streaming || args.customResponse;
  199. var requestStartTime = Date.now();
  200. var parsedUrl;
  201. if (typeof url === 'string') {
  202. if (!PROTO_RE.test(url)) {
  203. // Support `request('www.server.com')`
  204. url = 'http://' + url;
  205. }
  206. if (URL) {
  207. parsedUrl = urlutil.parse(new URL(url).href);
  208. } else {
  209. parsedUrl = urlutil.parse(url);
  210. }
  211. } else {
  212. parsedUrl = url;
  213. }
  214. var reqMeta = {
  215. requestId: reqId,
  216. url: parsedUrl.href,
  217. args: args,
  218. ctx: args.ctx,
  219. };
  220. if (args.emitter) {
  221. args.emitter.emit('request', reqMeta);
  222. }
  223. var method = (args.type || args.method || parsedUrl.method || 'GET').toUpperCase();
  224. var port = parsedUrl.port || 80;
  225. var httplib = http;
  226. var agent = getAgent(args.agent, exports.agent);
  227. var fixJSONCtlChars = args.fixJSONCtlChars;
  228. if (parsedUrl.protocol === 'https:') {
  229. httplib = https;
  230. agent = getAgent(args.httpsAgent, exports.httpsAgent);
  231. if (!parsedUrl.port) {
  232. port = 443;
  233. }
  234. }
  235. // request through proxy tunnel
  236. var proxyTunnelAgent = detectProxyAgent(parsedUrl, args);
  237. if (proxyTunnelAgent) {
  238. agent = proxyTunnelAgent;
  239. }
  240. var lookup = args.lookup;
  241. // check address to protect from SSRF and similar attacks
  242. if (args.checkAddress) {
  243. var _lookup = lookup || dns.lookup;
  244. lookup = function(host, dnsopts, callback) {
  245. _lookup(host, dnsopts, function emitLookup(err, ip, family) {
  246. // add check address logic in custom dns lookup
  247. if (!err && !args.checkAddress(ip, family)) {
  248. err = new Error('illegal address');
  249. err.name = 'IllegalAddressError';
  250. err.hostname = host;
  251. err.ip = ip;
  252. err.family = family;
  253. }
  254. callback(err, ip, family);
  255. });
  256. };
  257. }
  258. var requestSize = 0;
  259. var options = {
  260. host: parsedUrl.hostname || parsedUrl.host || 'localhost',
  261. path: parsedUrl.path || '/',
  262. method: method,
  263. port: port,
  264. agent: agent,
  265. headers: {},
  266. // default is dns.lookup
  267. // https://github.com/nodejs/node/blob/master/lib/net.js#L986
  268. // custom dnslookup require node >= 4.0.0 (for http), node >=8 (for https)
  269. // https://github.com/nodejs/node/blob/archived-io.js-v0.12/lib/net.js#L952
  270. lookup: lookup,
  271. };
  272. var originHeaderKeys = {};
  273. if (args.headers) {
  274. // only allow enumerable and ownProperty value of args.headers
  275. var names = utility.getOwnEnumerables(args.headers, true);
  276. for (var i = 0; i < names.length; i++) {
  277. var name = names[i];
  278. var key = name.toLowerCase();
  279. if (key !== name) {
  280. originHeaderKeys[key] = name;
  281. }
  282. options.headers[key] = args.headers[name];
  283. }
  284. }
  285. var sslNames = [
  286. 'pfx',
  287. 'key',
  288. 'passphrase',
  289. 'cert',
  290. 'ca',
  291. 'ciphers',
  292. 'rejectUnauthorized',
  293. 'secureProtocol',
  294. 'secureOptions',
  295. ];
  296. for (var i = 0; i < sslNames.length; i++) {
  297. var name = sslNames[i];
  298. if (args.hasOwnProperty(name)) {
  299. options[name] = args[name];
  300. }
  301. }
  302. // fix rejectUnauthorized when major version < 12
  303. if (NODE_MAJOR_VERSION < 12) {
  304. if (options.rejectUnauthorized === false && !options.hasOwnProperty('secureOptions')) {
  305. options.secureOptions = require('constants').SSL_OP_NO_TLSv1_2;
  306. }
  307. }
  308. var auth = args.auth || parsedUrl.auth;
  309. if (auth) {
  310. options.auth = auth;
  311. }
  312. var body = null;
  313. var dataAsQueryString = false;
  314. if (args.files) {
  315. if (!options.method || options.method === 'GET' || options.method === 'HEAD') {
  316. options.method = 'POST';
  317. }
  318. var files = args.files;
  319. var uploadFiles = [];
  320. if (Array.isArray(files)) {
  321. for (var i = 0; i < files.length; i++) {
  322. var field = 'file' + (i === 0 ? '' : i);
  323. uploadFiles.push([ field, files[i] ]);
  324. }
  325. } else {
  326. if (Buffer.isBuffer(files) || typeof files.pipe === 'function' || typeof files === 'string') {
  327. uploadFiles.push([ 'file', files ]);
  328. } else if (typeof files === 'object') {
  329. for (var field in files) {
  330. uploadFiles.push([ field, files[field] ]);
  331. }
  332. }
  333. }
  334. var form = new FormStream();
  335. // set normal fields first
  336. if (args.data) {
  337. for (var fieldName in args.data) {
  338. form.field(fieldName, args.data[fieldName]);
  339. }
  340. }
  341. for (var i = 0; i < uploadFiles.length; i++) {
  342. var item = uploadFiles[i];
  343. if (Buffer.isBuffer(item[1])) {
  344. form.buffer(item[0], item[1], 'bufferfile' + i);
  345. } else if (typeof item[1].pipe === 'function') {
  346. var filename = item[1].path || ('streamfile' + i);
  347. filename = path.basename(filename);
  348. form.stream(item[0], item[1], filename);
  349. } else {
  350. form.file(item[0], item[1]);
  351. }
  352. }
  353. var formHeaders = form.headers();
  354. var formHeaderNames = utility.getOwnEnumerables(formHeaders, true);
  355. for (var i = 0; i < formHeaderNames.length; i++) {
  356. var name = formHeaderNames[i];
  357. options.headers[name.toLowerCase()] = formHeaders[name];
  358. }
  359. debug('set multipart headers: %j, method: %s', formHeaders, options.method);
  360. args.stream = form;
  361. } else {
  362. body = args.content || args.data;
  363. dataAsQueryString = method === 'GET' || method === 'HEAD' || args.dataAsQueryString;
  364. if (!args.content) {
  365. if (body && !(typeof body === 'string' || Buffer.isBuffer(body))) {
  366. if (dataAsQueryString) {
  367. // read: GET, HEAD, use query string
  368. body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
  369. } else {
  370. var contentType = options.headers['content-type'];
  371. // auto add application/x-www-form-urlencoded when using urlencode form request
  372. if (!contentType) {
  373. if (args.contentType === 'json') {
  374. contentType = 'application/json';
  375. } else {
  376. contentType = 'application/x-www-form-urlencoded';
  377. }
  378. options.headers['content-type'] = contentType;
  379. }
  380. if (parseContentType(contentType).type === 'application/json') {
  381. body = JSON.stringify(body);
  382. } else {
  383. // 'application/x-www-form-urlencoded'
  384. body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
  385. }
  386. }
  387. }
  388. }
  389. }
  390. if (body) {
  391. // if it's a GET or HEAD request, data should be sent as query string
  392. if (dataAsQueryString) {
  393. options.path += (parsedUrl.query ? '&' : '?') + body;
  394. body = null;
  395. }
  396. if (body) {
  397. var length = body.length;
  398. if (!Buffer.isBuffer(body)) {
  399. length = Buffer.byteLength(body);
  400. }
  401. requestSize = length;
  402. options.headers['content-length'] = length.toString();
  403. }
  404. }
  405. if (args.dataType === 'json') {
  406. if (!options.headers.accept) {
  407. options.headers.accept = 'application/json';
  408. }
  409. }
  410. if (typeof args.beforeRequest === 'function') {
  411. // you can use this hook to change every thing.
  412. args.beforeRequest(options);
  413. }
  414. var connectTimer = null;
  415. var responseTimer = null;
  416. var __err = null;
  417. var connected = false; // socket connected or not
  418. var keepAliveSocket = false; // request with keepalive socket
  419. var socketHandledRequests = 0; // socket already handled request count
  420. var socketHandledResponses = 0; // socket already handled response count
  421. var responseSize = 0;
  422. var statusCode = -1;
  423. var statusMessage = null;
  424. var responseAborted = false;
  425. var remoteAddress = '';
  426. var remotePort = '';
  427. var timing = null;
  428. if (args.timing) {
  429. timing = {
  430. // socket assigned
  431. queuing: 0,
  432. // dns lookup time
  433. dnslookup: 0,
  434. // socket connected
  435. connected: 0,
  436. // request sent
  437. requestSent: 0,
  438. // Time to first byte (TTFB)
  439. waiting: 0,
  440. contentDownload: 0,
  441. };
  442. }
  443. function cancelConnectTimer() {
  444. if (connectTimer) {
  445. clearTimeout(connectTimer);
  446. connectTimer = null;
  447. debug('Request#%d connect timer canceled', reqId);
  448. }
  449. }
  450. function cancelResponseTimer() {
  451. if (responseTimer) {
  452. clearTimeout(responseTimer);
  453. responseTimer = null;
  454. debug('Request#%d response timer canceled', reqId);
  455. }
  456. }
  457. function done(err, data, res) {
  458. cancelConnectTimer();
  459. cancelResponseTimer();
  460. if (!callback) {
  461. console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s %s callback twice!!!',
  462. Date(), reqId, process.pid, options.method, url);
  463. // https://github.com/node-modules/urllib/pull/30
  464. if (err) {
  465. console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s: %s\nstack: %s',
  466. Date(), reqId, process.pid, err.name, err.message, err.stack);
  467. }
  468. return;
  469. }
  470. var cb = callback;
  471. callback = null;
  472. var headers = {};
  473. if (res) {
  474. statusCode = res.statusCode;
  475. statusMessage = res.statusMessage;
  476. headers = res.headers;
  477. }
  478. // handle digest auth
  479. if (statusCode === 401 && headers['www-authenticate']
  480. && !options.headers.authorization && args.digestAuth) {
  481. var authenticate = headers['www-authenticate'];
  482. if (authenticate.indexOf('Digest ') >= 0) {
  483. debug('Request#%d %s: got digest auth header WWW-Authenticate: %s', reqId, url, authenticate);
  484. options.headers.authorization = digestAuthHeader(options.method, options.path, authenticate, args.digestAuth);
  485. debug('Request#%d %s: auth with digest header: %s', reqId, url, options.headers.authorization);
  486. if (res.headers['set-cookie']) {
  487. options.headers.cookie = res.headers['set-cookie'].join(';');
  488. }
  489. args.headers = options.headers;
  490. return exports.requestWithCallback(url, args, cb);
  491. }
  492. }
  493. var requestUseTime = Date.now() - requestStartTime;
  494. if (timing) {
  495. timing.contentDownload = requestUseTime;
  496. }
  497. debug('[%sms] done, %s bytes HTTP %s %s %s %s, keepAliveSocket: %s, timing: %j, socketHandledRequests: %s, socketHandledResponses: %s',
  498. requestUseTime, responseSize, statusCode, options.method, options.host, options.path,
  499. keepAliveSocket, timing, socketHandledRequests, socketHandledResponses);
  500. var response = {
  501. status: statusCode,
  502. statusCode: statusCode,
  503. statusMessage: statusMessage,
  504. headers: headers,
  505. size: responseSize,
  506. aborted: responseAborted,
  507. rt: requestUseTime,
  508. keepAliveSocket: keepAliveSocket,
  509. data: data,
  510. requestUrls: args.requestUrls,
  511. timing: timing,
  512. remoteAddress: remoteAddress,
  513. remotePort: remotePort,
  514. socketHandledRequests: socketHandledRequests,
  515. socketHandledResponses: socketHandledResponses,
  516. };
  517. if (err) {
  518. var agentStatus = '';
  519. if (agent && typeof agent.getCurrentStatus === 'function') {
  520. // add current agent status to error message for logging and debug
  521. agentStatus = ', agent status: ' + JSON.stringify(agent.getCurrentStatus());
  522. }
  523. err.message += ', ' + options.method + ' ' + url + ' ' + statusCode
  524. + ' (connected: ' + connected + ', keepalive socket: ' + keepAliveSocket + agentStatus
  525. + ', socketHandledRequests: ' + socketHandledRequests
  526. + ', socketHandledResponses: ' + socketHandledResponses + ')'
  527. + '\nheaders: ' + JSON.stringify(headers);
  528. err.data = data;
  529. err.path = options.path;
  530. err.status = statusCode;
  531. err.headers = headers;
  532. err.res = response;
  533. addLongStackTrace(err, req);
  534. }
  535. // only support agentkeepalive module for now
  536. // agentkeepalive@4: agent.options.freeSocketTimeout
  537. // agentkeepalive@3: agent.freeSocketKeepAliveTimeout
  538. var freeSocketTimeout = agent && (agent.options && agent.options.freeSocketTimeout || agent.freeSocketKeepAliveTimeout);
  539. if (agent && agent.keepAlive && freeSocketTimeout > 0 &&
  540. statusCode >= 200 && headers.connection === 'keep-alive' && headers['keep-alive']) {
  541. // adjust freeSocketTimeout on the socket
  542. var m = KEEP_ALIVE_RE.exec(headers['keep-alive']);
  543. if (m) {
  544. var seconds = parseInt(m[1]);
  545. if (seconds > 0) {
  546. // network delay 500ms
  547. var serverSocketTimeout = seconds * 1000 - 500;
  548. if (serverSocketTimeout < freeSocketTimeout) {
  549. // https://github.com/node-modules/agentkeepalive/blob/master/lib/agent.js#L127
  550. // agentkeepalive@4
  551. var socket = res.socket || (req && req.socket);
  552. if (agent.options && agent.options.freeSocketTimeout) {
  553. socket.freeSocketTimeout = serverSocketTimeout;
  554. } else {
  555. socket.freeSocketKeepAliveTimeout = serverSocketTimeout;
  556. }
  557. }
  558. }
  559. }
  560. }
  561. cb(err, data, args.streaming ? res : response);
  562. if (args.emitter) {
  563. // keep to use the same reqMeta object on request event before
  564. reqMeta.url = parsedUrl.href;
  565. reqMeta.socket = req && req.connection;
  566. reqMeta.options = options;
  567. reqMeta.size = requestSize;
  568. args.emitter.emit('response', {
  569. requestId: reqId,
  570. error: err,
  571. ctx: args.ctx,
  572. req: reqMeta,
  573. res: response,
  574. });
  575. }
  576. }
  577. function handleRedirect(res) {
  578. var err = null;
  579. if (args.followRedirect && statuses.redirect[res.statusCode]) { // handle redirect
  580. args._followRedirectCount = (args._followRedirectCount || 0) + 1;
  581. var location = res.headers.location;
  582. if (!location) {
  583. err = new Error('Got statusCode ' + res.statusCode + ' but cannot resolve next location from headers');
  584. err.name = 'FollowRedirectError';
  585. } else if (args._followRedirectCount > args.maxRedirects) {
  586. err = new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + url);
  587. err.name = 'MaxRedirectError';
  588. } else {
  589. var newUrl = args.formatRedirectUrl ? args.formatRedirectUrl(url, location) : urlutil.resolve(url, location);
  590. debug('Request#%d %s: `redirected` from %s to %s', reqId, options.path, url, newUrl);
  591. // make sure timer stop
  592. cancelResponseTimer();
  593. // should clean up headers.host on `location: http://other-domain/url`
  594. if (options.headers.host && PROTO_RE.test(location)) {
  595. options.headers.host = null;
  596. args.headers = options.headers;
  597. }
  598. // avoid done will be execute in the future change.
  599. var cb = callback;
  600. callback = null;
  601. exports.requestWithCallback(newUrl, args, cb);
  602. return {
  603. redirect: true,
  604. error: null
  605. };
  606. }
  607. }
  608. return {
  609. redirect: false,
  610. error: err
  611. };
  612. }
  613. // don't set user-agent
  614. if (args.headers && (args.headers['User-Agent'] === null || args.headers['user-agent'] === null)) {
  615. if (options.headers['user-agent']) {
  616. delete options.headers['user-agent'];
  617. }
  618. } else {
  619. // need to set user-agent
  620. var hasAgentHeader = options.headers['user-agent'];
  621. if (!hasAgentHeader) {
  622. options.headers['user-agent'] = USER_AGENT;
  623. }
  624. }
  625. if (args.gzip) {
  626. var isAcceptEncodingNull = (args.headers && (args.headers['Accept-Encoding'] === null || args.headers['accept-encoding'] === null));
  627. if (!isAcceptEncodingNull) {
  628. var hasAcceptEncodingHeader = options.headers['accept-encoding'];
  629. if (!hasAcceptEncodingHeader) {
  630. options.headers['accept-encoding'] = 'gzip, deflate';
  631. }
  632. }
  633. }
  634. function decodeContent(res, body, cb) {
  635. var encoding = res.headers['content-encoding'];
  636. if (body.length === 0 || !encoding) {
  637. return cb(null, body, encoding);
  638. }
  639. encoding = encoding.toLowerCase();
  640. switch (encoding) {
  641. case 'gzip':
  642. case 'deflate':
  643. debug('unzip %d length body', body.length);
  644. zlib.unzip(body, function(err, data) {
  645. if (err && err.name === 'Error') {
  646. err.name = 'UnzipError';
  647. }
  648. cb(err, data);
  649. });
  650. break;
  651. default:
  652. cb(null, body, encoding);
  653. }
  654. }
  655. var writeStream = args.writeStream;
  656. var isWriteStreamClose = false;
  657. debug('Request#%d %s %s with headers %j, options.path: %s',
  658. reqId, method, url, options.headers, options.path);
  659. args.requestUrls.push(parsedUrl.href);
  660. function onResponse(res) {
  661. socketHandledResponses = res.socket[SOCKET_RESPONSE_COUNT] = (res.socket[SOCKET_RESPONSE_COUNT] || 0) + 1;
  662. if (timing) {
  663. timing.waiting = Date.now() - requestStartTime;
  664. }
  665. debug('Request#%d %s `req response` event emit: status %d, headers: %j',
  666. reqId, url, res.statusCode, res.headers);
  667. if (args.streaming) {
  668. var result = handleRedirect(res);
  669. if (result.redirect) {
  670. res.resume();
  671. return;
  672. }
  673. if (result.error) {
  674. res.resume();
  675. return done(result.error, null, res);
  676. }
  677. return done(null, null, res);
  678. }
  679. res.on('error', function () {
  680. debug('Request#%d %s: `res error` event emit, total size %d, socket handled %s requests and %s responses',
  681. reqId, url, responseSize, socketHandledRequests, socketHandledResponses);
  682. });
  683. res.on('aborted', function () {
  684. responseAborted = true;
  685. debug('Request#%d %s: `res aborted` event emit, total size %d',
  686. reqId, url, responseSize);
  687. });
  688. if (writeStream) {
  689. // If there's a writable stream to recieve the response data, just pipe the
  690. // response stream to that writable stream and call the callback when it has
  691. // finished writing.
  692. //
  693. // NOTE that when the response stream `res` emits an 'end' event it just
  694. // means that it has finished piping data to another stream. In the
  695. // meanwhile that writable stream may still writing data to the disk until
  696. // it emits a 'close' event.
  697. //
  698. // That means that we should not apply callback until the 'close' of the
  699. // writable stream is emited.
  700. //
  701. // See also:
  702. // - https://github.com/TBEDP/urllib/commit/959ac3365821e0e028c231a5e8efca6af410eabb
  703. // - http://nodejs.org/api/stream.html#stream_event_end
  704. // - http://nodejs.org/api/stream.html#stream_event_close_1
  705. var result = handleRedirect(res);
  706. if (result.redirect) {
  707. res.resume();
  708. return;
  709. }
  710. if (result.error) {
  711. res.resume();
  712. // end ths stream first
  713. writeStream.end();
  714. done(result.error, null, res);
  715. return;
  716. }
  717. // you can set consumeWriteStream false that only wait response end
  718. if (args.consumeWriteStream === false) {
  719. res.on('end', done.bind(null, null, null, res));
  720. pump(res, writeStream, function(err) {
  721. if (isWriteStreamClose) {
  722. return;
  723. }
  724. isWriteStreamClose = true;
  725. debug('Request#%d %s: writeStream close, error: %s', reqId, url, err);
  726. });
  727. return;
  728. }
  729. // node 0.10, 0.12: only emit res aborted, writeStream close not fired
  730. if (isNode010 || isNode012) {
  731. first([
  732. [ writeStream, 'close' ],
  733. [ res, 'aborted' ],
  734. ], function(_, stream, event) {
  735. debug('Request#%d %s: writeStream or res %s event emitted', reqId, url, event);
  736. done(__err || null, null, res);
  737. });
  738. res.pipe(writeStream);
  739. return;
  740. }
  741. debug('Request#%d %s: pump res to writeStream', reqId, url);
  742. pump(res, writeStream, function(err) {
  743. debug('Request#%d %s: writeStream close event emitted, error: %s, isWriteStreamClose: %s',
  744. reqId, url, err, isWriteStreamClose);
  745. if (isWriteStreamClose) {
  746. return;
  747. }
  748. isWriteStreamClose = true;
  749. done(__err || err, null, res);
  750. });
  751. return;
  752. }
  753. // Otherwise, just concat those buffers.
  754. //
  755. // NOTE that the `chunk` is not a String but a Buffer. It means that if
  756. // you simply concat two chunk with `+` you're actually converting both
  757. // Buffers into Strings before concating them. It'll cause problems when
  758. // dealing with multi-byte characters.
  759. //
  760. // The solution is to store each chunk in an array and concat them with
  761. // 'buffer-concat' when all chunks is recieved.
  762. //
  763. // See also:
  764. // http://cnodejs.org/topic/4faf65852e8fb5bc65113403
  765. var chunks = [];
  766. res.on('data', function (chunk) {
  767. debug('Request#%d %s: `res data` event emit, size %d', reqId, url, chunk.length);
  768. responseSize += chunk.length;
  769. chunks.push(chunk);
  770. });
  771. var isEmitted = false;
  772. function handleResponseCloseAndEnd(event) {
  773. debug('Request#%d %s: `res %s` event emit, total size %d, socket handled %s requests and %s responses',
  774. reqId, url, event, responseSize, socketHandledRequests, socketHandledResponses);
  775. if (isEmitted) {
  776. return;
  777. }
  778. isEmitted = true;
  779. var body = Buffer.concat(chunks, responseSize);
  780. debug('Request#%d %s: _dumped: %s',
  781. reqId, url, res._dumped);
  782. if (__err) {
  783. // req.abort() after `res data` event emit.
  784. return done(__err, body, res);
  785. }
  786. var result = handleRedirect(res);
  787. if (result.error) {
  788. return done(result.error, body, res);
  789. }
  790. if (result.redirect) {
  791. return;
  792. }
  793. decodeContent(res, body, function (err, data, encoding) {
  794. if (err) {
  795. return done(err, body, res);
  796. }
  797. // if body not decode, dont touch it
  798. if (!encoding && TEXT_DATA_TYPES.indexOf(args.dataType) >= 0) {
  799. // try to decode charset
  800. try {
  801. data = decodeBodyByCharset(data, res);
  802. } catch (e) {
  803. debug('decodeBodyByCharset error: %s', e);
  804. // if error, dont touch it
  805. return done(null, data, res);
  806. }
  807. if (args.dataType === 'json') {
  808. if (responseSize === 0) {
  809. data = null;
  810. } else {
  811. var r = parseJSON(data, fixJSONCtlChars);
  812. if (r.error) {
  813. err = r.error;
  814. } else {
  815. data = r.data;
  816. }
  817. }
  818. }
  819. }
  820. if (responseAborted) {
  821. // err = new Error('Remote socket was terminated before `response.end()` was called');
  822. // err.name = 'RemoteSocketClosedError';
  823. debug('Request#%d %s: Remote socket was terminated before `response.end()` was called', reqId, url);
  824. }
  825. done(err, data, res);
  826. });
  827. }
  828. // node >= 14 only emit close if req abort
  829. res.on('close', function () {
  830. handleResponseCloseAndEnd('close');
  831. });
  832. res.on('end', function () {
  833. handleResponseCloseAndEnd('end');
  834. });
  835. }
  836. var connectTimeout, responseTimeout;
  837. if (Array.isArray(args.timeout)) {
  838. connectTimeout = ms(args.timeout[0]);
  839. responseTimeout = ms(args.timeout[1]);
  840. } else { // set both timeout equal
  841. connectTimeout = responseTimeout = ms(args.timeout);
  842. }
  843. debug('ConnectTimeout: %d, ResponseTimeout: %d', connectTimeout, responseTimeout);
  844. function startConnectTimer() {
  845. debug('Connect timer ticking, timeout: %d', connectTimeout);
  846. connectTimer = setTimeout(function () {
  847. connectTimer = null;
  848. if (statusCode === -1) {
  849. statusCode = -2;
  850. }
  851. var msg = 'Connect timeout for ' + connectTimeout + 'ms';
  852. var errorName = 'ConnectionTimeoutError';
  853. if (!req.socket) {
  854. errorName = 'SocketAssignTimeoutError';
  855. msg += ', working sockets is full';
  856. }
  857. __err = new Error(msg);
  858. __err.name = errorName;
  859. __err.requestId = reqId;
  860. debug('ConnectTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
  861. abortRequest();
  862. }, connectTimeout);
  863. }
  864. function startResposneTimer() {
  865. debug('Response timer ticking, timeout: %d', responseTimeout);
  866. responseTimer = setTimeout(function () {
  867. responseTimer = null;
  868. var msg = 'Response timeout for ' + responseTimeout + 'ms';
  869. var errorName = 'ResponseTimeoutError';
  870. __err = new Error(msg);
  871. __err.name = errorName;
  872. __err.requestId = reqId;
  873. debug('ResponseTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
  874. abortRequest();
  875. }, responseTimeout);
  876. }
  877. if (args.checkAddress) {
  878. var hostname = parsedUrl.hostname;
  879. // if request hostname is ip, custom lookup wont excute
  880. var family = null;
  881. if (ip.isV4Format(hostname)) {
  882. family = 4;
  883. } else if (ip.isV6Format(hostname)) {
  884. family = 6;
  885. }
  886. if (family) {
  887. if (!args.checkAddress(hostname, family)) {
  888. var err = new Error('illegal address');
  889. err.name = 'IllegalAddressError';
  890. err.hostname = hostname;
  891. err.ip = hostname;
  892. err.family = family;
  893. return done(err);
  894. }
  895. }
  896. }
  897. // request headers checker will throw error
  898. try {
  899. var finalOptions = options;
  900. // restore origin header key
  901. if (args.keepHeaderCase) {
  902. var originKeys = Object.keys(originHeaderKeys);
  903. if (originKeys.length) {
  904. var finalHeaders = {};
  905. var names = utility.getOwnEnumerables(options.headers, true);
  906. for (var i = 0; i < names.length; i++) {
  907. var name = names[i];
  908. finalHeaders[originHeaderKeys[name] || name] = options.headers[name];
  909. }
  910. finalOptions = Object.assign({}, options);
  911. finalOptions.headers = finalHeaders;
  912. }
  913. }
  914. req = httplib.request(finalOptions, onResponse);
  915. if (args.trace) {
  916. req._callSite = {};
  917. Error.captureStackTrace(req._callSite, requestWithCallback);
  918. }
  919. } catch (err) {
  920. return done(err);
  921. }
  922. // environment detection: browser or nodejs
  923. if (typeof(window) === 'undefined') {
  924. // start connect timer just after `request` return, and just in nodejs environment
  925. startConnectTimer();
  926. }
  927. var isRequestAborted = false;
  928. function abortRequest() {
  929. if (isRequestAborted) {
  930. return;
  931. }
  932. isRequestAborted = true;
  933. debug('Request#%d %s abort, connected: %s', reqId, url, connected);
  934. // it wont case error event when req haven't been assigned a socket yet.
  935. if (!req.socket) {
  936. __err.noSocket = true;
  937. done(__err);
  938. }
  939. req.abort();
  940. }
  941. if (timing) {
  942. // request sent
  943. req.on('finish', function() {
  944. timing.requestSent = Date.now() - requestStartTime;
  945. });
  946. }
  947. req.once('socket', function (socket) {
  948. if (timing) {
  949. // socket queuing time
  950. timing.queuing = Date.now() - requestStartTime;
  951. }
  952. // https://github.com/nodejs/node/blob/master/lib/net.js#L377
  953. // https://github.com/nodejs/node/blob/v0.10.40-release/lib/net.js#L352
  954. // should use socket.socket on 0.10.x
  955. if (isNode010 && socket.socket) {
  956. socket = socket.socket;
  957. }
  958. var orginalSocketTimeout = getSocketTimeout(socket);
  959. if (orginalSocketTimeout && orginalSocketTimeout < responseTimeout) {
  960. // make sure socket live longer than the response timer
  961. var socketTimeout = responseTimeout + 500;
  962. debug('Request#%d socket.timeout(%s) < responseTimeout(%s), reset socket timeout to %s',
  963. reqId, orginalSocketTimeout, responseTimeout, socketTimeout);
  964. socket.setTimeout(socketTimeout);
  965. }
  966. socketHandledRequests = socket[SOCKET_REQUEST_COUNT] = (socket[SOCKET_REQUEST_COUNT] || 0) + 1;
  967. if (socket[SOCKET_RESPONSE_COUNT]) {
  968. socketHandledResponses = socket[SOCKET_RESPONSE_COUNT];
  969. }
  970. var readyState = socket.readyState;
  971. if (readyState === 'opening') {
  972. socket.once('lookup', function(err, ip, addressType) {
  973. debug('Request#%d %s lookup: %s, %s, %s', reqId, url, err, ip, addressType);
  974. if (timing) {
  975. timing.dnslookup = Date.now() - requestStartTime;
  976. }
  977. if (ip) {
  978. remoteAddress = ip;
  979. }
  980. });
  981. socket.once('connect', function() {
  982. if (timing) {
  983. // socket connected
  984. timing.connected = Date.now() - requestStartTime;
  985. }
  986. // cancel socket timer at first and start tick for TTFB
  987. cancelConnectTimer();
  988. startResposneTimer();
  989. debug('Request#%d %s new socket connected', reqId, url);
  990. connected = true;
  991. if (!remoteAddress) {
  992. remoteAddress = socket.remoteAddress;
  993. }
  994. remotePort = socket.remotePort;
  995. });
  996. return;
  997. }
  998. debug('Request#%d %s reuse socket connected, readyState: %s', reqId, url, readyState);
  999. connected = true;
  1000. keepAliveSocket = true;
  1001. if (!remoteAddress) {
  1002. remoteAddress = socket.remoteAddress;
  1003. }
  1004. remotePort = socket.remotePort;
  1005. // reuse socket, timer should be canceled.
  1006. cancelConnectTimer();
  1007. startResposneTimer();
  1008. });
  1009. if (writeStream) {
  1010. writeStream.once('error', function(err) {
  1011. err.message += ' (writeStream "error")';
  1012. __err = err;
  1013. debug('Request#%d %s `writeStream error` event emit, %s: %s', reqId, url, err.name, err.message);
  1014. abortRequest();
  1015. });
  1016. }
  1017. var isRequestError = false;
  1018. function handleRequestError(err) {
  1019. if (isRequestError || !err) {
  1020. return;
  1021. }
  1022. isRequestError = true;
  1023. if (err.name === 'Error') {
  1024. err.name = connected ? 'ResponseError' : 'RequestError';
  1025. }
  1026. debug('Request#%d %s `req error` event emit, %s: %s', reqId, url, err.name, err.message);
  1027. done(__err || err);
  1028. }
  1029. if (args.stream) {
  1030. debug('Request#%d pump args.stream to req', reqId);
  1031. pump(args.stream, req, handleRequestError);
  1032. } else {
  1033. req.end(body);
  1034. }
  1035. // when stream already consumed, req's `finish` event is emitted and pump will ignore error after pipe finished
  1036. // but if server response timeout later, we will abort the request and emit an error in req
  1037. // so we must always manually listen to req's `error` event here to ensure this error is handled
  1038. req.on('error', handleRequestError);
  1039. req.requestId = reqId;
  1040. return req;
  1041. }
  1042. exports.requestWithCallback = requestWithCallback;
  1043. var JSONCtlCharsMap = {
  1044. '"': '\\"', // \u0022
  1045. '\\': '\\\\', // \u005c
  1046. '\b': '\\b', // \u0008
  1047. '\f': '\\f', // \u000c
  1048. '\n': '\\n', // \u000a
  1049. '\r': '\\r', // \u000d
  1050. '\t': '\\t' // \u0009
  1051. };
  1052. var JSONCtlCharsRE = /[\u0000-\u001F\u005C]/g;
  1053. function _replaceOneChar(c) {
  1054. return JSONCtlCharsMap[c] || '\\u' + (c.charCodeAt(0) + 0x10000).toString(16).substr(1);
  1055. }
  1056. function replaceJSONCtlChars(str) {
  1057. return str.replace(JSONCtlCharsRE, _replaceOneChar);
  1058. }
  1059. function parseJSON(data, fixJSONCtlChars) {
  1060. var result = {
  1061. error: null,
  1062. data: null
  1063. };
  1064. if (fixJSONCtlChars) {
  1065. if (typeof fixJSONCtlChars === 'function') {
  1066. data = fixJSONCtlChars(data);
  1067. } else {
  1068. // https://github.com/node-modules/urllib/pull/77
  1069. // remote the control characters (U+0000 through U+001F)
  1070. data = replaceJSONCtlChars(data);
  1071. }
  1072. }
  1073. try {
  1074. result.data = JSON.parse(data);
  1075. } catch (err) {
  1076. if (err.name === 'SyntaxError') {
  1077. err.name = 'JSONResponseFormatError';
  1078. }
  1079. if (data.length > 1024) {
  1080. // show 0~512 ... -512~end data
  1081. err.message += ' (data json format: ' +
  1082. JSON.stringify(data.slice(0, 512)) + ' ...skip... ' + JSON.stringify(data.slice(data.length - 512)) + ')';
  1083. } else {
  1084. err.message += ' (data json format: ' + JSON.stringify(data) + ')';
  1085. }
  1086. result.error = err;
  1087. }
  1088. return result;
  1089. }
  1090. /**
  1091. * decode response body by parse `content-type`'s charset
  1092. * @param {Buffer} data
  1093. * @param {Http(s)Response} res
  1094. * @return {String}
  1095. */
  1096. function decodeBodyByCharset(data, res) {
  1097. var type = res.headers['content-type'];
  1098. if (!type) {
  1099. return data.toString();
  1100. }
  1101. var type = parseContentType(type);
  1102. var charset = type.parameters.charset || 'utf-8';
  1103. if (!Buffer.isEncoding(charset)) {
  1104. if (!_iconv) {
  1105. _iconv = require('iconv-lite');
  1106. }
  1107. return _iconv.decode(data, charset);
  1108. }
  1109. return data.toString(charset);
  1110. }
  1111. function getAgent(agent, defaultAgent) {
  1112. return agent === undefined ? defaultAgent : agent;
  1113. }
  1114. function parseContentType(str) {
  1115. try {
  1116. return contentTypeParser.parse(str);
  1117. } catch (err) {
  1118. // ignore content-type error, tread as default
  1119. return { parameters: {} };
  1120. }
  1121. }
  1122. function addLongStackTrace(err, req) {
  1123. if (!req) {
  1124. return;
  1125. }
  1126. var callSiteStack = req._callSite && req._callSite.stack;
  1127. if (!callSiteStack || typeof callSiteStack !== 'string') {
  1128. return;
  1129. }
  1130. if (err._longStack) {
  1131. return;
  1132. }
  1133. var index = callSiteStack.indexOf('\n');
  1134. if (index !== -1) {
  1135. err._longStack = true;
  1136. err.stack += LONG_STACK_DELIMITER + callSiteStack.substr(index + 1);
  1137. }
  1138. }
  1139. // node 8 don't has timeout attribute on socket
  1140. // https://github.com/nodejs/node/pull/21204/files#diff-e6ef024c3775d787c38487a6309e491dR408
  1141. function getSocketTimeout(socket) {
  1142. return socket.timeout || socket._idleTimeout;
  1143. }