Node源码解析——http(下)
关于http的最后一篇文章,看数据容器outgoing和incoming的内部实现,以及之前遗留的问题。
OutgoingMessage
全局搜一下OutgoingMessage,可以发现除了一些测试文件外,用到的地方只有两个——ClientRequest和ServerResponse,分别用到的时机在前面的文章都谈到了,ClientRequest用于客户端发出请求构造的req,ServerResponse用于服务端响应请求构造的res。那么对应的IncomingMessage什么时候用到也就很清楚了,即ClientRequest收到的res和ServerResponse接收的req。
简单概括,outgoing和incoming双端都会用到,不过是out还是in是相对于自身来说的。比如client发起req,那么req本身就是out,在client这边会用outgoingMessage,对于server,client的req则是in,在server这边又是incomingMessage了。
如果还有印象的话,在http上这篇文章中就介绍过,serverResponse就是在parserOnIncoming这个函数中根据req构造outgoingMessage的。
const res = new server[kServerResponse](req);
那么现在就进去_http_outgoing.js
仔细看看outgoing究竟做了什么。
function OutgoingMessage() {
Stream.call(this);
// Queue that holds all currently pending data, until the response will be
// assigned to the socket (until it will its turn in the HTTP pipeline).
this.outputData = [];
// `outputSize` is an approximate measure of how much data is queued on this
// response. `_onPendingData` will be invoked to update similar global
// per-connection counter. That counter will be used to pause/unpause the
// TCP socket and HTTP Parser and thus handle the backpressure.
this.outputSize = 0;
this.writable = true;
this._last = false;
this.chunkedEncoding = false;
this.shouldKeepAlive = true;
this.useChunkedEncodingByDefault = true;
this.sendDate = false;
this._removedConnection = false;
this._removedContLen = false;
this._removedTE = false;
this._contentLength = null;
this._hasBody = true;
this._trailer = '';
this[kNeedDrain] = false;
this.finished = false;
this._headerSent = false;
this[kCorked] = 0;
this.socket = null;
this._header = null;
this[kOutHeaders] = null;
this._onPendingData = noopPendingOutput;
}
ObjectSetPrototypeOf(OutgoingMessage.prototype, Stream.prototype);
ObjectSetPrototypeOf(OutgoingMessage, Stream);
首先肯定知道,我们平时处理req,res都用到了stream,那么自然是要继承stream。构造函数中也就主要做了这件事,并初始化了一系列属性。需要注意有一个outputData,注释也说明了,用来队列化待处理的数据,直到res被分配socket,就相当于数据缓冲区。那么res什么时候被分配socket呢?直觉上肯定是有空闲socket来处理这些数据了。
回到parserOnIncoming函数找到assignSocket。
if (socket._httpMessage) {
// There are already pending outgoing res, append.
state.outgoing.push(res);
} else {
res.assignSocket(socket);
}
可以看到,当socket被占用时,新进的res会在一个名为outgoing的队列中等待。换句话说,socket空闲时就会走assignSocket。在ServerResponse中找到其实现。
ServerResponse.prototype.assignSocket = function assignSocket(socket) {
assert(!socket._httpMessage);
socket._httpMessage = this;
socket.on('close', onServerResponseClose);
this.socket = socket;
this.emit('socket', socket);
this._flush();
};
这里的_flush会把outputData中仍未被消费的data刷到socket中。
// This function, outgoingFlush(), is called by both the Server and Client
// to attempt to flush any pending messages out to the socket.
OutgoingMessage.prototype._flush = function _flush() {
const socket = this.socket;
if (socket && socket.writable) {
// There might be remaining data in this.output; write it out
const ret = this._flushOutput(socket);
if (this.finished) {
// This is a queue to the server or client to bring in the next this.
this._finish();
} else if (ret && this[kNeedDrain]) {
this[kNeedDrain] = false;
this.emit('drain');
}
}
};
为什么要维护outgoing队列,注释中也有一个案例解释,这里直接粘贴过来。
// In both HTTP servers and clients it is possible to queue up several
// outgoing messages. This is easiest to imagine in the case of a client.
// Take the following situation:
//
// req1 = client.request('GET', '/');
// req2 = client.request('POST', '/');
//
// When the user does
//
// req2.write('hello world\n');
//
// it's possible that the first request has not been completely flushed to
// the socket yet. Thus the outgoing messages need to be prepared to queue
// up data internally before sending it on further to the socket's queue.
//
再看parserOnIncoming中,同时还监听了res的finish事件。
function resOnFinish(req, res, socket, state, server) {
// Usually the first incoming element should be our request. it may
// be that in the case abortIncoming() was called that the incoming
// array will be empty.
assert(state.incoming.length === 0 || state.incoming[0] === req);
state.incoming.shift();
clearIncoming(req);
// If the user never called req.read(), and didn't pipe() or
// .resume() or .on('data'), then we call req._dump() so that the
// bytes will be pulled off the wire.
if (!req._consuming && !req._readableState.resumeScheduled)
req._dump();
res.detachSocket(socket);
req.emit('close');
process.nextTick(emitCloseNT, res);
if (res._last) {
if (typeof socket.destroySoon === 'function') {
socket.destroySoon();
} else {
socket.end();
}
} else if (state.outgoing.length === 0) {
if (server.keepAliveTimeout && typeof socket.setTimeout === 'function') {
socket.setTimeout(server.keepAliveTimeout);
state.keepAliveTimeoutSet = true;
}
} else {
// Start sending the next message
const m = state.outgoing.shift();
if (m) {
m.assignSocket(socket);
}
}
}
在最后会取出队列中等待的outgoing继续复用socket处理。
至此就明白了outgoingMessage之间是如何被socket管理的。简单概括,就是对于每一个连接,都会经过parser分配一个socket,对于该连接发来的IncomingMessage,会基于其转化为outgoingMessage维护为队列缓冲,复用socket处理。
那么继续看outgoingMessage内部是如何消费数据的。当拿到res时,一般会通过writeHead方法写入响应头。这个方法实现在ServerResponse的原型上。
function writeHead(statusCode, reason, obj) {
// ...
this._storeHeader(statusLine, headers);
return this;
}
省略一系列参数和特殊情况处理,最终会调用_storeHeader存储我们的响应头,这个实现就要进入outgoingMessage了。
function _storeHeader(firstLine, headers) {
// firstLine in the case of request is: 'GET /index.html HTTP/1.1\r\n'
// in the case of response it is: 'HTTP/1.1 200 OK\r\n'
const state = {
connection: false,
contLen: false,
te: false,
date: false,
expect: false,
trailer: false,
header: firstLine
};
if (headers) {
if (headers === this[kOutHeaders]) {
for (const key in headers) {
const entry = headers[key];
processHeader(this, state, entry[0], entry[1], false);
}
} else if (ArrayIsArray(headers)) {
for (const entry of headers) {
processHeader(this, state, entry[0], entry[1], true);
}
} else {
for (const key in headers) {
if (ObjectPrototypeHasOwnProperty(headers, key)) {
processHeader(this, state, key, headers[key], true);
}
}
}
}
// ...
}
内容比较多,一步一步看。首先构造一个关于header的state对象初始化一些信息。再根据headers的类型调用processHeader加工。
function processHeader(self, state, key, value, validate) {
if (validate)
validateHeaderName(key);
if (ArrayIsArray(value)) {
if (value.length < 2 || !isCookieField(key)) {
// Retain for(;;) loop for performance reasons
// Refs: https://github.com/nodejs/node/pull/30958
for (let i = 0; i < value.length; i++)
storeHeader(self, state, key, value[i], validate);
return;
}
value = value.join('; ');
}
storeHeader(self, state, key, value, validate);
}
验证后通过storeHeader拼接到state的header上。
function storeHeader(self, state, key, value, validate) {
if (validate)
validateHeaderValue(key, value);
state.header += key + ': ' + value + CRLF;
matchHeader(self, state, key, value);
}
再调用matchHeader根据header设置state信息。state相当于一层中转,之后根据state的信息,就可以继续拼接一些比较复杂的header了。
let { header } = state;
// Date header
if (this.sendDate && !state.date) {
header += 'Date: ' + utcDate() + CRLF;
}
// Force the connection to close when the response is a 204 No Content or
// a 304 Not Modified and the user has set a "Transfer-Encoding: chunked"
// header.
//
// RFC 2616 mandates that 204 and 304 responses MUST NOT have a body but
// node.js used to send out a zero chunk anyway to accommodate clients
// that don't have special handling for those responses.
//
// It was pointed out that this might confuse reverse proxies to the point
// of creating security liabilities, so suppress the zero chunk and force
// the connection to close.
if (this.chunkedEncoding && (this.statusCode === 204 ||
this.statusCode === 304)) {
debug(this.statusCode + ' response should not use chunked encoding,' +
' closing connection.');
this.chunkedEncoding = false;
this.shouldKeepAlive = false;
}
// keep-alive logic
if (this._removedConnection) {
this._last = true;
this.shouldKeepAlive = false;
} else if (!state.connection) {
const shouldSendKeepAlive = this.shouldKeepAlive &&
(state.contLen || this.useChunkedEncodingByDefault || this.agent);
if (shouldSendKeepAlive) {
header += 'Connection: keep-alive\r\n';
} else {
this._last = true;
header += 'Connection: close\r\n';
}
}
if (!state.contLen && !state.te) {
if (!this._hasBody) {
// Make sure we don't end the 0\r\n\r\n at the end of the message.
this.chunkedEncoding = false;
} else if (!this.useChunkedEncodingByDefault) {
this._last = true;
} else if (!state.trailer &&
!this._removedContLen &&
typeof this._contentLength === 'number') {
header += 'Content-Length: ' + this._contentLength + CRLF;
} else if (!this._removedTE) {
header += 'Transfer-Encoding: chunked\r\n';
this.chunkedEncoding = true;
} else {
// We should only be able to get here if both Content-Length and
// Transfer-Encoding are removed by the user.
// See: test/parallel/test-http-remove-header-stays-removed.js
debug('Both Content-Length and Transfer-Encoding are removed');
}
}
// Test non-chunked message does not have trailer header set,
// message will be terminated by the first empty line after the
// header fields, regardless of the header fields present in the
// message, and thus cannot contain a message body or 'trailers'.
if (this.chunkedEncoding !== true && state.trailer) {
throw new ERR_HTTP_TRAILER_INVALID();
}
this._header = header + CRLF;
this._headerSent = false;
// Wait until the first body chunk, or close(), is sent to flush,
// UNLESS we're sending Expect: 100-continue.
if (state.expect) this._send('');
最终处理完成后的header会设置到_header这个属性上。注意到最后的注释,如果有100-countinue的话就会通过_send把结果写入socket,也说明正常情况下,header处理完后是不会直接返回到客户端的,而是要等待第一个响应体chunk或者连接关闭。那么我们就继续看写入响应体的手段——write。
OutgoingMessage.prototype.write = function write(chunk, encoding, callback) {
const ret = write_(this, chunk, encoding, callback, false);
if (!ret)
this[kNeedDrain] = true;
return ret;
};
这里会调用write_得到ret,当ret不为真时,说明数据还在内存中排队,并未刷新到内核缓冲区,就需要设置kNeedDrain等待触发drain事件。这里涉及到stream的知识先不细讲,总之当触发drain事件时,代表我们可以继续调用write写入数据。
function write_(msg, chunk, encoding, callback, fromEnd) {
if (msg.finished) {
writeAfterEnd(msg, callback);
return true;
}
if (!msg._header) {
msg._implicitHeader();
}
if (!msg._hasBody) {
debug('This type of response MUST NOT have a body. ' +
'Ignoring write() calls.');
if (callback) process.nextTick(callback);
return true;
}
if (!fromEnd && typeof chunk !== 'string' && !(chunk instanceof Buffer)) {
throw new ERR_INVALID_ARG_TYPE('first argument',
['string', 'Buffer'], chunk);
}
if (!fromEnd && msg.socket && !msg.socket.writableCorked) {
msg.socket.cork();
process.nextTick(connectionCorkNT, msg.socket);
}
let ret;
if (msg.chunkedEncoding && chunk.length !== 0) {
let len;
if (typeof chunk === 'string')
len = Buffer.byteLength(chunk, encoding);
else
len = chunk.length;
msg._send(len.toString(16), 'latin1', null);
msg._send(crlf_buf, null, null);
msg._send(chunk, encoding, null);
ret = msg._send(crlf_buf, null, callback);
} else {
ret = msg._send(chunk, encoding, callback);
}
debug('write ret = ' + ret);
return ret;
}
除了特殊情况和错误处理外,值得注意的有区分分块传输的逻辑。不过最终都还是会调用_send。
// This abstract either writing directly to the socket or buffering it.
OutgoingMessage.prototype._send = function _send(data, encoding, callback) {
// This is a shameful hack to get the headers and first body chunk onto
// the same packet. Future versions of Node are going to take care of
// this at a lower level and in a more general way.
if (!this._headerSent) {
if (typeof data === 'string' &&
(encoding === 'utf8' || encoding === 'latin1' || !encoding)) {
data = this._header + data;
} else {
const header = this._header;
if (this.outputData.length === 0) {
this.outputData = [{
data: header,
encoding: 'latin1',
callback: null
}];
} else {
this.outputData.unshift({
data: header,
encoding: 'latin1',
callback: null
});
}
this.outputSize += header.length;
this._onPendingData(header.length);
}
this._headerSent = true;
}
return this._writeRaw(data, encoding, callback);
};
注释提到,通过这个方法把数据写到socket或缓冲区,并且有一个hack把headers和第一个chunk绑到同一个packet里。再调用_writeRaw。
function _writeRaw(data, encoding, callback) {
const conn = this.socket;
if (conn && conn.destroyed) {
// The socket was destroyed. If we're still trying to write to it,
// then we haven't gotten the 'close' event yet.
return false;
}
if (typeof encoding === 'function') {
callback = encoding;
encoding = null;
}
if (conn && conn._httpMessage === this && conn.writable) {
// There might be pending data in the this.output buffer.
if (this.outputData.length) {
this._flushOutput(conn);
}
// Directly write to socket.
return conn.write(data, encoding, callback);
}
// Buffer, as long as we're not destroyed.
this.outputData.push({ data, encoding, callback });
this.outputSize += data.length;
this._onPendingData(data.length);
return this.outputSize < HIGH_WATER_MARK;
}
当socket可写时,先通过_flushOutput把缓冲区的数据刷出来,之后再直接写入。如果不可写,就先缓冲起来。到这里我们就完成了一次服务端到客户端信息通过socket的传输,待客户端接收即可。
最后看响应的终止信号end方法。
OutgoingMessage.prototype.end = function end(chunk, encoding, callback) {
// ...
if (typeof callback === 'function')
this.once('finish', callback);
const finish = onFinish.bind(undefined, this);
if (this._hasBody && this.chunkedEncoding) {
this._send('0\r\n' + this._trailer + '\r\n', 'latin1', finish);
} else {
// Force a flush, HACK.
this._send('', 'latin1', finish);
}
if (this.socket) {
// Fully uncork connection on end().
this.socket._writableState.corked = 1;
this.socket.uncork();
}
this[kCorked] = 0;
this.finished = true;
// There is the first message on the outgoing queue, and we've sent
// everything to the socket.
debug('outgoing message end.');
if (this.outputData.length === 0 &&
this.socket &&
this.socket._httpMessage === this) {
this._finish();
}
return this;
};
有一大段end当write的使用逻辑省略掉,这里主要做的事就是触发finish事件,并将socket的缓冲数据输出。抛出的finish事件会在ServerResponse的resOnFinish里接住,上篇文章也讲过,在resOnFinish里做的事就是维护incoming和outgoing队列了。当没有待处理的队列请求时,就会销毁socket,否则复用socket继续下一轮的数据处理。至此outgoing内的一次数据流转就处理完成了。
IncomingMessage
再看在服务端是如何处理IncomingMessage的,想想我们是什么时候能拿到req的,触发request事件时,也就是parser解析完header的时候。在_http_common.js
中找到parserOnHeadersComplete函数。
const ParserIncomingMessage = (socket && socket.server &&
socket.server[kIncomingMessage]) ||
IncomingMessage;
const incoming = parser.incoming = new ParserIncomingMessage(socket);
这里得到的incoming就是req了。进到_http_incoming.js
看实现。
/* Abstract base class for ServerRequest and ClientResponse. */
function IncomingMessage(socket) {
let streamOptions;
if (socket) {
streamOptions = {
highWaterMark: socket.readableHighWaterMark
};
}
Stream.Readable.call(this, streamOptions);
this._readableState.readingMore = true;
this.socket = socket;
this.httpVersionMajor = null;
this.httpVersionMinor = null;
this.httpVersion = null;
this.complete = false;
this.headers = {};
this.rawHeaders = [];
this.trailers = {};
this.rawTrailers = [];
this.readable = true;
this.aborted = false;
this.upgrade = null;
// request (server) only
this.url = '';
this.method = null;
// response (client) only
this.statusCode = null;
this.statusMessage = null;
this.client = socket;
this._consuming = false;
// Flag for when we decide that this message cannot possibly be
// read by the user, so there's no point continuing to handle it.
this._dumped = false;
}
ObjectSetPrototypeOf(IncomingMessage.prototype, Stream.Readable.prototype);
ObjectSetPrototypeOf(IncomingMessage, Stream.Readable);
同样继承了Stream,不过要注意只继承了可读的部分。同时注意到根据是req还是res的incoming有不同的属性初始化,也说明了incoming是在两端都有用到的。
回到ParserOnHeadersComplete,这里通过parser解析的header肯定要存到incomingMessage中。关键在这一行。
incoming._addHeaderLines(headers, n);
到incoming中看实现。
function _addHeaderLines(headers, n) {
if (headers && headers.length) {
let dest;
if (this.complete) {
this.rawTrailers = headers;
dest = this.trailers;
} else {
this.rawHeaders = headers;
dest = this.headers;
}
for (let i = 0; i < n; i += 2) {
this._addHeaderLine(headers[i], headers[i + 1], dest);
}
}
}
继续调用_addHeaderLine针对每条header写入,最终会添加到headers属性中,我们就可以直接从req中拿到了。继续看请求体的获取。请求体就不能直接拿了,而是要监听stream的data事件,那么还是先要搞清楚请求体是什么时候被写入incoming的。还记得parser里关于请求体解析的回调吗?就是这里。
parser[kOnBody] = parserOnBody
当通过socket的请求体来到时,parser通过c++代码调用这个回调。
int on_body(const char* at, size_t length) {
EscapableHandleScope scope(env()->isolate());
Local<Object> obj = object();
Local<Value> cb = obj->Get(env()->context(), kOnBody).ToLocalChecked();
if (!cb->IsFunction())
return 0;
// We came from consumed stream
if (current_buffer_.IsEmpty()) {
// Make sure Buffer will be in parent HandleScope
current_buffer_ = scope.Escape(Buffer::Copy(
env()->isolate(),
current_buffer_data_,
current_buffer_len_).ToLocalChecked());
}
Local<Value> argv[3] = {
current_buffer_,
Integer::NewFromUnsigned(env()->isolate(), at - current_buffer_data_),
Integer::NewFromUnsigned(env()->isolate(), length)
};
MaybeLocal<Value> r = MakeCallback(cb.As<Function>(),
arraysize(argv),
argv);
if (r.IsEmpty()) {
got_exception_ = true;
llhttp_set_error_reason(&parser_, "HPE_JS_EXCEPTION:JS Exception");
return HPE_USER;
}
return 0;
}
进入其实现。
function parserOnBody(b, start, len) {
const stream = this.incoming;
// If the stream has already been removed, then drop it.
if (stream === null)
return;
// Pretend this was the result of a stream._read call.
if (len > 0 && !stream._dumped) {
const slice = b.slice(start, start + len);
const ret = stream.push(slice);
if (!ret)
readStop(this.socket);
}
}
通过stream推入,即可触发data事件获取请求体数据。再回到incoming文件中,看其内部的_read
方法。
IncomingMessage.prototype._read = function _read(n) {
if (!this._consuming) {
this._readableState.readingMore = false;
this._consuming = true;
}
// We actually do almost nothing here, because the parserOnBody
// function fills up our internal buffer directly. However, we
// do need to unpause the underlying socket so that it flows.
if (this.socket.readable)
readStart(this.socket);
};
注释说的很明白,这个函数几乎什么都不用做,因为通过parserOnBody就拿到数据了,验证了上一步。到这里关于Server的outgoing和incoming的使用就结束了。最后再看看客户端的使用。
Client
首先是OutgoingMessage,会在构造ClientRequest时继承已经说过了,不过要提一点就是在connection初始化时会通过tickOnSocket这个回调把socket,req,parser三者循环分配,保证各自都能相互取到。
function tickOnSocket(req, socket) {
const parser = parsers.alloc();
req.socket = socket;
parser.initialize(HTTPParser.RESPONSE,
new HTTPClientAsyncResource('HTTPINCOMINGMESSAGE', req),
req.maxHeaderSize || 0,
req.insecureHTTPParser === undefined ?
isLenient() : req.insecureHTTPParser);
parser.socket = socket;
parser.outgoing = req;
req.parser = parser;
socket.parser = parser;
socket._httpMessage = req;
// ...
}
到这里Client端的req也就有了借助outgoing的缓冲机制往server输出数据的能力了。再看Client如何处理Incoming。对应的,同样是在parser的onIncoming回调中。
parser.onIncoming = parserOnIncomingClient;
onIncoming的调用同样是在_http_common.js
的parserOnHeadersComplete这个函数中,是两端复用的就不再解释了。client的res也就有了incoming读数据的能力。
小结
网络模块终于读得差不多了,还差tls/http2/https 等更上层的模块,不过也都是基于tcp,udp,http的思路之上。在读这些源码之前,还是先把这篇文章涉及到的更基础,更重要的stream模块理解。
-- EOF --