From 9333b379aa4309ad91255f5ff7cfd40fbdae06ed Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Sun, 17 Jan 2016 06:11:42 +0000 Subject: [PATCH 1/2] lib.blit: Added "blitter" module This is a simple placeholder implementation for an optimized bit-blitting API. --- src/lib/blit.lua | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/lib/blit.lua diff --git a/src/lib/blit.lua b/src/lib/blit.lua new file mode 100644 index 0000000000..ce4a31ff1f --- /dev/null +++ b/src/lib/blit.lua @@ -0,0 +1,33 @@ +-- blit.lua - offload engine for memory operations + +module(..., package.seeall) + +local ffi = require("ffi") + +-- The blit module provides "blitter" operation to offload +-- performance-critical memory operations. The API allows scheduling a +-- series of operations, that can be performed at any time and in any +-- order, and then executing a "barrier" to wait for completion. + +-- The implementation in this file is very basic but could be extended +-- in the future to take advantage of the flexibility afforded by the +-- API to perform special optimizations (for example parallel memory +-- copies to amortize cache latency, etc). + +function copy (dst, src, len) + -- Trivial implementation: simply do an immediate memory copy. + ffi.copy(dst, src, len) +end + +-- Wait until all copies have completed. +function barrier () + -- No-op because the copies were already executed eagerly. +end + +function selftest () + print("selftest: blit") + -- It would be valuable to have an extensive selftest function to + -- make it easy to develop and test new optimized blitter + -- implementations. + print("selftest: ok") +end From b93121383ab8444edb995e26e4a0c8b8ad25488f Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Sun, 17 Jan 2016 06:12:56 +0000 Subject: [PATCH 2/2] (WIP) virtio net_device: Perform VM copies with lib.blit Update the vhost-user code to perform Snabb<->VM memory copies via the lib.blit module. This allows experimental optimizations with local changes to the blit module. This essentially separates "virtio vring processing" and "virtio memory copies" into being two separate problems that can be profiled and optimized separately. This is work-in-progress: Care must be taken not to let the guest see that packets are available until the blit.barrier() operation has been executed and I think this will require moving the ring index updates. --- src/lib/virtio/net_device.lua | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/lib/virtio/net_device.lua b/src/lib/virtio/net_device.lua index 58b040be30..4cf0381989 100644 --- a/src/lib/virtio/net_device.lua +++ b/src/lib/virtio/net_device.lua @@ -11,6 +11,7 @@ local packet = require("core.packet") local timer = require("core.timer") local vq = require("lib.virtio.virtq") local checksum = require("lib.checksum") +local blit = require("lib.blit") local ffi = require("ffi") local C = ffi.C local band = bit.band @@ -106,6 +107,7 @@ end function VirtioNetDevice:poll_vring_receive () -- RX self:receive_packets_from_vm() + blit.barrier() self:rx_signal_used() end @@ -139,7 +141,8 @@ function VirtioNetDevice:rx_buffer_add(rx_p, addr, len) local addr = self:map_from_guest(addr) local pointer = ffi.cast(char_ptr_t, addr) - packet.append(rx_p, pointer, len) + rx_p.length = rx_p.length + len + blit.copy(rx_p, pointer, len) return len end @@ -174,6 +177,7 @@ end function VirtioNetDevice:poll_vring_transmit () -- RX self:transmit_packets_to_vm() + blit.barrier() self:tx_signal_used() end @@ -239,7 +243,7 @@ function VirtioNetDevice:tx_buffer_add(tx_p, addr, len) local pointer = ffi.cast(char_ptr_t, addr) assert(tx_p.length <= len) - ffi.copy(pointer, tx_p.data, tx_p.length) + blit.copy(pointer, tx_p.data, tx_p.length) return tx_p.length end @@ -288,7 +292,7 @@ function VirtioNetDevice:tx_buffer_add_mrg_rxbuf(tx_p, addr, len) local to_copy = math.min(tx_p.length - self.tx.data_sent, len + adjust) -- copy the data to the adjusted pointer - ffi.copy(pointer - adjust, tx_p.data + self.tx.data_sent, to_copy) + ffi.copy(tx_p.data + self.tx.data_sent, pointer - adjust, to_copy) -- update the num_buffers in the first virtio header self.tx.tx_mrg_hdr[0].num_buffers = self.tx.tx_mrg_hdr[0].num_buffers + 1