From 69bafce8077bc3515eb8a1b2ac8f87dd39cf6838 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 21 Jul 2016 19:06:37 +0800 Subject: [PATCH 01/75] devicetree: bindings: IB: Add binding document for Hisilicon RoCE This patch added DTS binding document for Hisilicon RoCE driver. Signed-off-by: Wei Hu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- .../infiniband/hisilicon-hns-roce.txt | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt diff --git a/Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt b/Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt new file mode 100644 index 00000000000000..f97993be2dcbdc --- /dev/null +++ b/Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt @@ -0,0 +1,107 @@ +Hisilicon RoCE DT description + +Hisilicon RoCE engine is a part of network subsystem. +It works depending on other part of network wubsytem, such as, gmac and +dsa fabric. + +Additional properties are described here: + +Required properties: +- compatible: Should contain "hisilicon,hns-roce-v1". +- reg: Physical base address of the RoCE driver and +length of memory mapped region. +- eth-handle: phandle, specifies a reference to a node +representing a ethernet device. +- dsaf-handle: phandle, specifies a reference to a node +representing a dsaf device. +- #address-cells: must be 2 +- #size-cells: must be 2 +Optional properties: +- dma-coherent: Present if DMA operations are coherent. +- interrupt-parent: the interrupt parent of this device. +- interrupts: should contain 32 completion event irq,1 async event irq +and 1 event overflow irq. +- interrupt-names:should be one of 34 irqs for roce device + - hns-roce-comp-0 ~ hns-roce-comp-31: 32 complete event irq + - hns-roce-async: 1 async event irq + - hns-roce-common: named common exception warning irq +Example: + infiniband@c4000000 { + compatible = "hisilicon,hns-roce-v1"; + reg = <0x0 0xc4000000 0x0 0x100000>; + dma-coherent; + eth-handle = <ð2 ð3 ð4 ð5 ð6 ð7>; + dsaf-handle = <&soc0_dsa>; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mbigen_dsa>; + interrupts = <722 1>, + <723 1>, + <724 1>, + <725 1>, + <726 1>, + <727 1>, + <728 1>, + <729 1>, + <730 1>, + <731 1>, + <732 1>, + <733 1>, + <734 1>, + <735 1>, + <736 1>, + <737 1>, + <738 1>, + <739 1>, + <740 1>, + <741 1>, + <742 1>, + <743 1>, + <744 1>, + <745 1>, + <746 1>, + <747 1>, + <748 1>, + <749 1>, + <750 1>, + <751 1>, + <752 1>, + <753 1>, + <785 1>, + <754 4>; + + interrupt-names = "hns-roce-comp-0", + "hns-roce-comp-1", + "hns-roce-comp-2", + "hns-roce-comp-3", + "hns-roce-comp-4", + "hns-roce-comp-5", + "hns-roce-comp-6", + "hns-roce-comp-7", + "hns-roce-comp-8", + "hns-roce-comp-9", + "hns-roce-comp-10", + "hns-roce-comp-11", + "hns-roce-comp-12", + "hns-roce-comp-13", + "hns-roce-comp-14", + "hns-roce-comp-15", + "hns-roce-comp-16", + "hns-roce-comp-17", + "hns-roce-comp-18", + "hns-roce-comp-19", + "hns-roce-comp-20", + "hns-roce-comp-21", + "hns-roce-comp-22", + "hns-roce-comp-23", + "hns-roce-comp-24", + "hns-roce-comp-25", + "hns-roce-comp-26", + "hns-roce-comp-27", + "hns-roce-comp-28", + "hns-roce-comp-29", + "hns-roce-comp-30", + "hns-roce-comp-31", + "hns-roce-async", + "hns-roce-common"; + }; From 9a4435375cd151e07c0c38fa601b00115986091b Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 21 Jul 2016 19:06:38 +0800 Subject: [PATCH 02/75] IB/hns: Add driver files for hns RoCE driver These are the various new source code files for the Hisilicon RoCE driver for ARM architecture. Signed-off-by: Wei Hu Signed-off-by: Nenglong Zhao Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 128 + drivers/infiniband/hw/hns/hns_roce_alloc.c | 257 ++ drivers/infiniband/hw/hns/hns_roce_cmd.c | 368 +++ drivers/infiniband/hw/hns/hns_roce_cmd.h | 80 + drivers/infiniband/hw/hns/hns_roce_common.h | 325 +++ drivers/infiniband/hw/hns/hns_roce_cq.c | 446 +++ drivers/infiniband/hw/hns/hns_roce_device.h | 734 +++++ drivers/infiniband/hw/hns/hns_roce_eq.c | 762 +++++ drivers/infiniband/hw/hns/hns_roce_eq.h | 130 + drivers/infiniband/hw/hns/hns_roce_hem.c | 476 ++++ drivers/infiniband/hw/hns/hns_roce_hem.h | 131 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2781 +++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 981 +++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 1059 +++++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 614 ++++ drivers/infiniband/hw/hns/hns_roce_pd.c | 144 + drivers/infiniband/hw/hns/hns_roce_qp.c | 855 ++++++ drivers/infiniband/hw/hns/hns_roce_user.h | 53 + 18 files changed, 10324 insertions(+) create mode 100644 drivers/infiniband/hw/hns/hns_roce_ah.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_alloc.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_cmd.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_cmd.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_common.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_cq.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_device.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_eq.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_eq.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_hem.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_hem.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v1.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v1.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_main.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_mr.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_pd.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_qp.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_user.h diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c new file mode 100644 index 00000000000000..24f79ee39fdf56 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "hns_roce_device.h" + +#define HNS_ROCE_PORT_NUM_SHIFT 24 +#define HNS_ROCE_VLAN_SL_BIT_MASK 7 +#define HNS_ROCE_VLAN_SL_SHIFT 13 + +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + struct device *dev = &hr_dev->pdev->dev; + struct ib_gid_attr gid_attr; + struct hns_roce_ah *ah; + u16 vlan_tag = 0xffff; + struct in6_addr in6; + union ib_gid sgid; + int ret; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + /* Get mac address */ + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw)); + if (rdma_is_multicast_addr(&in6)) + rdma_get_mcast_mac(&in6, ah->av.mac); + else + memcpy(ah->av.mac, ah_attr->dmac, sizeof(ah_attr->dmac)); + + /* Get source gid */ + ret = ib_get_cached_gid(ibpd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) { + dev_err(dev, "get sgid failed! ret = %d\n", ret); + kfree(ah); + return ERR_PTR(ret); + } + + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + dev_put(gid_attr.ndev); + } + + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & HNS_ROCE_VLAN_SL_BIT_MASK) << + HNS_ROCE_VLAN_SL_SHIFT; + + ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | (ah_attr->port_num << + HNS_ROCE_PORT_NUM_SHIFT)); + ah->av.gid_index = ah_attr->grh.sgid_index; + ah->av.vlan = cpu_to_le16(vlan_tag); + dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, + ah->av.vlan); + + if (ah_attr->static_rate) + ah->av.stat_rate = IB_RATE_10_GBPS; + + memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, HNS_ROCE_GID_SIZE); + ah->av.sl_tclass_flowlabel = cpu_to_le32(ah_attr->sl << + HNS_ROCE_SL_SHIFT); + + return &ah->ibah; +} + +int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct hns_roce_ah *ah = to_hr_ah(ibah); + + memset(ah_attr, 0, sizeof(*ah_attr)); + + ah_attr->sl = le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT; + ah_attr->port_num = le32_to_cpu(ah->av.port_pd) >> + HNS_ROCE_PORT_NUM_SHIFT; + ah_attr->static_rate = ah->av.stat_rate; + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.traffic_class = le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_TCLASS_SHIFT; + ah_attr->grh.flow_label = le32_to_cpu(ah->av.sl_tclass_flowlabel) & + HNS_ROCE_FLOW_LABLE_MASK; + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.sgid_index = ah->av.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, HNS_ROCE_GID_SIZE); + + return 0; +} + +int hns_roce_destroy_ah(struct ib_ah *ah) +{ + kfree(to_hr_ah(ah)); + + return 0; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c new file mode 100644 index 00000000000000..863a17a2de4072 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "hns_roce_device.h" + +int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) +{ + int ret = 0; + + spin_lock(&bitmap->lock); + *obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); + if (*obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + *obj = find_first_zero_bit(bitmap->table, bitmap->max); + } + + if (*obj < bitmap->max) { + set_bit(*obj, bitmap->table); + bitmap->last = (*obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; + *obj |= bitmap->top; + } else { + ret = -1; + } + + spin_unlock(&bitmap->lock); + + return ret; +} + +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj) +{ + hns_roce_bitmap_free_range(bitmap, obj, 1); +} + +int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, + int align, unsigned long *obj) +{ + int ret = 0; + int i; + + if (likely(cnt == 1 && align == 1)) + return hns_roce_bitmap_alloc(bitmap, obj); + + spin_lock(&bitmap->lock); + + *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, + bitmap->last, cnt, align - 1); + if (*obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, 0, + cnt, align - 1); + } + + if (*obj < bitmap->max) { + for (i = 0; i < cnt; i++) + set_bit(*obj + i, bitmap->table); + + if (*obj == bitmap->last) { + bitmap->last = (*obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + *obj |= bitmap->top; + } else { + ret = -1; + } + + spin_unlock(&bitmap->lock); + + return ret; +} + +void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, + unsigned long obj, int cnt) +{ + int i; + + obj &= bitmap->max + bitmap->reserved_top - 1; + + spin_lock(&bitmap->lock); + for (i = 0; i < cnt; i++) + clear_bit(obj + i, bitmap->table); + + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + spin_unlock(&bitmap->lock); +} + +int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 reserved_top) +{ + u32 i; + + if (num != roundup_pow_of_two(num)) + return -EINVAL; + + bitmap->last = 0; + bitmap->top = 0; + bitmap->max = num - reserved_top; + bitmap->mask = mask; + bitmap->reserved_top = reserved_top; + spin_lock_init(&bitmap->lock); + bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long), + GFP_KERNEL); + if (!bitmap->table) + return -ENOMEM; + + for (i = 0; i < reserved_bot; ++i) + set_bit(i, bitmap->table); + + return 0; +} + +void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) +{ + kfree(bitmap->table); +} + +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, + struct hns_roce_buf *buf) +{ + int i; + struct device *dev = &hr_dev->pdev->dev; + u32 bits_per_long = BITS_PER_LONG; + + if (buf->nbufs == 1) { + dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); + } else { + if (bits_per_long == 64) + vunmap(buf->direct.buf); + + for (i = 0; i < buf->nbufs; ++i) + if (buf->page_list[i].buf) + dma_free_coherent(&hr_dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); + } +} + +int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, + struct hns_roce_buf *buf) +{ + int i = 0; + dma_addr_t t; + struct page **pages; + struct device *dev = &hr_dev->pdev->dev; + u32 bits_per_long = BITS_PER_LONG; + + /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ + if (size <= max_direct) { + buf->nbufs = 1; + /* Npages calculated by page_size */ + buf->npages = 1 << get_order(size); + buf->page_shift = PAGE_SHIFT; + /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ + buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + + memset(buf->direct.buf, 0, size); + } else { + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), + GFP_KERNEL); + + if (!buf->page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; ++i) { + buf->page_list[i].buf = dma_alloc_coherent(dev, + PAGE_SIZE, &t, + GFP_KERNEL); + + if (!buf->page_list[i].buf) + goto err_free; + + buf->page_list[i].map = t; + memset(buf->page_list[i].buf, 0, PAGE_SIZE); + } + if (bits_per_long == 64) { + pages = kmalloc_array(buf->nbufs, sizeof(*pages), + GFP_KERNEL); + if (!pages) + goto err_free; + + for (i = 0; i < buf->nbufs; ++i) + pages[i] = virt_to_page(buf->page_list[i].buf); + + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, + PAGE_KERNEL); + kfree(pages); + if (!buf->direct.buf) + goto err_free; + } + } + + return 0; + +err_free: + hns_roce_buf_free(hr_dev, size, buf); + return -ENOMEM; +} + +void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_cq_table(hr_dev); + hns_roce_cleanup_mr_table(hr_dev); + hns_roce_cleanup_pd_table(hr_dev); + hns_roce_cleanup_uar_table(hr_dev); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c new file mode 100644 index 00000000000000..2a0b6c05da5f5e --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" + +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define STATUS_MASK 0xff +#define CMD_TOKEN_MASK 0x1f +#define GO_BIT_TIMEOUT_MSECS 10000 + +enum { + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + HCR_GO_BIT = 15, +}; + +static int cmd_pending(struct hns_roce_dev *hr_dev) +{ + u32 status = readl(hr_dev->cmd.hcr + HCR_TOKEN_OFFSET); + + return (!!(status & (1 << HCR_GO_BIT))); +} + +/* this function should be serialized with "hcr_mutex" */ +static int __hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, + u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, + u16 token, int event) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + struct device *dev = &hr_dev->pdev->dev; + u32 __iomem *hcr = (u32 *)cmd->hcr; + int ret = -EAGAIN; + unsigned long end; + u32 val = 0; + + end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; + while (cmd_pending(hr_dev)) { + if (time_after(jiffies, end)) { + dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies, + (int)end); + goto out; + } + cond_resched(); + } + + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + op); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + + __raw_writeq(cpu_to_le64(in_param), hcr + 0); + __raw_writeq(cpu_to_le64(out_param), hcr + 2); + __raw_writel(cpu_to_le32(in_modifier), hcr + 4); + /* Memory barrier */ + wmb(); + + __raw_writel(cpu_to_le32(val), hcr + 5); + + mmiowb(); + ret = 0; + +out: + return ret; +} + +static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, + u8 op_modifier, u16 op, u16 token, + int event) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + int ret = -EAGAIN; + + mutex_lock(&cmd->hcr_mutex); + ret = __hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, token, + event); + mutex_unlock(&cmd->hcr_mutex); + + return ret; +} + +/* this should be called with "poll_sem" */ +static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, + unsigned long timeout) +{ + struct device *dev = &hr_dev->pdev->dev; + u8 __iomem *hcr = hr_dev->cmd.hcr; + unsigned long end = 0; + u32 status = 0; + int ret; + + ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + CMD_POLL_TOKEN, 0); + if (ret) { + dev_err(dev, "[cmd_poll]hns_roce_cmd_mbox_post_hw failed\n"); + goto out; + } + + end = msecs_to_jiffies(timeout) + jiffies; + while (cmd_pending(hr_dev) && time_before(jiffies, end)) + cond_resched(); + + if (cmd_pending(hr_dev)) { + dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); + ret = -ETIMEDOUT; + goto out; + } + + status = le32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_STATUS_OFFSET)); + if ((status & STATUS_MASK) != 0x1) { + dev_err(dev, "mailbox status 0x%x!\n", status); + ret = -EBUSY; + goto out; + } + +out: + return ret; +} + +static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + int ret; + + down(&hr_dev->cmd.poll_sem); + ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); + up(&hr_dev->cmd.poll_sem); + + return ret; +} + +void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, + u64 out_param) +{ + struct hns_roce_cmd_context + *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + + if (token != context->token) + return; + + context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO); + context->out_param = out_param; + complete(&context->done); +} + +/* this should be called with "use_events" */ +static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, + unsigned long timeout) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cmd_context *context; + int ret = 0; + + spin_lock(&cmd->context_lock); + WARN_ON(cmd->free_head < 0); + context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; + cmd->free_head = context->next; + spin_unlock(&cmd->context_lock); + + init_completion(&context->done); + + ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + context->token, 1); + if (ret) + goto out; + + /* + * It is timeout when wait_for_completion_timeout return 0 + * The return value is the time limit set in advance + * how many seconds showing + */ + if (!wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout))) { + dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n"); + ret = -EBUSY; + goto out; + } + + ret = context->result; + if (ret) { + dev_err(dev, "[cmd]event mod cmd process error!err=%d\n", ret); + goto out; + } + +out: + spin_lock(&cmd->context_lock); + context->next = cmd->free_head; + cmd->free_head = context - cmd->context; + spin_unlock(&cmd->context_lock); + + return ret; +} + +static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + int ret = 0; + + down(&hr_dev->cmd.event_sem); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, timeout); + up(&hr_dev->cmd.event_sem); + + return ret; +} + +int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, + unsigned long in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + if (hr_dev->cmd.use_events) + return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); + else + return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); +} + +int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + + mutex_init(&hr_dev->cmd.hcr_mutex); + sema_init(&hr_dev->cmd.poll_sem, 1); + hr_dev->cmd.use_events = 0; + hr_dev->cmd.toggle = 1; + hr_dev->cmd.max_cmds = CMD_MAX_NUM; + hr_dev->cmd.hcr = hr_dev->reg_base + ROCEE_MB1_REG; + hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev, + HNS_ROCE_MAILBOX_SIZE, + HNS_ROCE_MAILBOX_SIZE, 0); + if (!hr_dev->cmd.pool) + return -ENOMEM; + + return 0; +} + +void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev) +{ + dma_pool_destroy(hr_dev->cmd.pool); +} + +int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; + int i; + + hr_cmd->context = kmalloc(hr_cmd->max_cmds * + sizeof(struct hns_roce_cmd_context), + GFP_KERNEL); + if (!hr_cmd->context) + return -ENOMEM; + + for (i = 0; i < hr_cmd->max_cmds; ++i) { + hr_cmd->context[i].token = i; + hr_cmd->context[i].next = i + 1; + } + + hr_cmd->context[hr_cmd->max_cmds - 1].next = -1; + hr_cmd->free_head = 0; + + sema_init(&hr_cmd->event_sem, hr_cmd->max_cmds); + spin_lock_init(&hr_cmd->context_lock); + + hr_cmd->token_mask = CMD_TOKEN_MASK; + hr_cmd->use_events = 1; + + down(&hr_cmd->poll_sem); + + return 0; +} + +void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; + int i; + + hr_cmd->use_events = 0; + + for (i = 0; i < hr_cmd->max_cmds; ++i) + down(&hr_cmd->event_sem); + + kfree(hr_cmd->context); + up(&hr_cmd->poll_sem); +} + +struct hns_roce_cmd_mailbox + *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, + &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox) +{ + if (!mailbox) + return; + + dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h new file mode 100644 index 00000000000000..e3997d312c5581 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_CMD_H +#define _HNS_ROCE_CMD_H + +#define HNS_ROCE_MAILBOX_SIZE 4096 + +enum { + /* TPT commands */ + HNS_ROCE_CMD_SW2HW_MPT = 0xd, + HNS_ROCE_CMD_HW2SW_MPT = 0xf, + + /* CQ commands */ + HNS_ROCE_CMD_SW2HW_CQ = 0x16, + HNS_ROCE_CMD_HW2SW_CQ = 0x17, + + /* QP/EE commands */ + HNS_ROCE_CMD_RST2INIT_QP = 0x19, + HNS_ROCE_CMD_INIT2RTR_QP = 0x1a, + HNS_ROCE_CMD_RTR2RTS_QP = 0x1b, + HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, + HNS_ROCE_CMD_2ERR_QP = 0x1e, + HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_SQD2RTS_QP = 0x20, + HNS_ROCE_CMD_2RST_QP = 0x21, + HNS_ROCE_CMD_QUERY_QP = 0x22, +}; + +enum { + HNS_ROCE_CMD_TIME_CLASS_A = 10000, + HNS_ROCE_CMD_TIME_CLASS_B = 10000, + HNS_ROCE_CMD_TIME_CLASS_C = 10000, +}; + +struct hns_roce_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + +int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, + unsigned long in_modifier, u8 op_modifier, u16 op, + unsigned long timeout); + +struct hns_roce_cmd_mailbox + *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox); + +#endif /* _HNS_ROCE_CMD_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h new file mode 100644 index 00000000000000..297016103aa712 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_COMMON_H +#define _HNS_ROCE_COMMON_H + +#ifndef assert +#define assert(cond) +#endif + +#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) +#define roce_read(dev, reg) readl((dev)->reg_base + (reg)) +#define roce_raw_write(value, addr) \ + __raw_writel((__force u32)cpu_to_le32(value), (addr)) + +#define roce_get_field(origin, mask, shift) \ + (((origin) & (mask)) >> (shift)) + +#define roce_get_bit(origin, shift) \ + roce_get_field((origin), (1ul << (shift)), (shift)) + +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= (~(mask)); \ + (origin) |= (((u32)(val) << (shift)) & (mask)); \ + } while (0) + +#define roce_set_bit(origin, shift, val) \ + roce_set_field((origin), (1ul << (shift)), (shift), (val)) + +#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 +#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 + +#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5 + +#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6 + +#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10 +#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \ + (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S) + +#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16 + +#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0 +#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \ + (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S) + +#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24 +#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \ + (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S) + +#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0 +#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \ + (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S) + +#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24 +#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \ + (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S) + +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0 +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \ + (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S) + +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16 +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \ + (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S) + +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0 +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \ + (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S) + +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16 +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \ + (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S) + +#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0 +#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \ + (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0 +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \ + (((1UL << 15) - 1) << \ + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16 +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \ + (((1UL << 4) - 1) << \ + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20 + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21 + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S) + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S) + +#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0 +#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S) + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S) + +#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0 +#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S) + +#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8 +#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \ + (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19 + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \ + (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31 + +#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0 +#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \ + (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S) + +#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0 +#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \ + (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S) + +#define ROCEE_MB6_ROCEE_MB_CMD_S 0 +#define ROCEE_MB6_ROCEE_MB_CMD_M \ + (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S) + +#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8 +#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \ + (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S) + +#define ROCEE_MB6_ROCEE_MB_EVENT_S 14 + +#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15 + +#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16 +#define ROCEE_MB6_ROCEE_MB_TOKEN_M \ + (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \ + (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \ + (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \ + (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31 + +#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0 +#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \ + (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S) + +#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16 +#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \ + (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0 +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \ + (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8 +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \ + (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17 + +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0 +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \ + (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S) + +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16 +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \ + (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S) + +#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0 +#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \ + (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S) + +#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16 +#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1 +#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0 + +#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0 +#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1 + +#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0 + +#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0 +#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \ + (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) + +#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0 +#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ + (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + +#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 +#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ + (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) + +/*************ROCEE_REG DEFINITION****************/ +#define ROCEE_VENDOR_ID_REG 0x0 +#define ROCEE_VENDOR_PART_ID_REG 0x4 + +#define ROCEE_HW_VERSION_REG 0x8 + +#define ROCEE_SYS_IMAGE_GUID_L_REG 0xC +#define ROCEE_SYS_IMAGE_GUID_H_REG 0x10 + +#define ROCEE_PORT_GID_L_0_REG 0x50 +#define ROCEE_PORT_GID_ML_0_REG 0x54 +#define ROCEE_PORT_GID_MH_0_REG 0x58 +#define ROCEE_PORT_GID_H_0_REG 0x5C + +#define ROCEE_BT_CMD_H_REG 0x204 + +#define ROCEE_SMAC_L_0_REG 0x240 +#define ROCEE_SMAC_H_0_REG 0x244 + +#define ROCEE_QP1C_CFG3_0_REG 0x27C + +#define ROCEE_CAEP_AEQE_CONS_IDX_REG 0x3AC +#define ROCEE_CAEP_CEQC_CONS_IDX_0_REG 0x3BC + +#define ROCEE_ECC_UCERR_ALM1_REG 0xB38 +#define ROCEE_ECC_UCERR_ALM2_REG 0xB3C +#define ROCEE_ECC_CERR_ALM1_REG 0xB44 +#define ROCEE_ECC_CERR_ALM2_REG 0xB48 + +#define ROCEE_ACK_DELAY_REG 0x14 +#define ROCEE_GLB_CFG_REG 0x18 + +#define ROCEE_DMAE_USER_CFG1_REG 0x40 +#define ROCEE_DMAE_USER_CFG2_REG 0x44 + +#define ROCEE_DB_SQ_WL_REG 0x154 +#define ROCEE_DB_OTHERS_WL_REG 0x158 +#define ROCEE_RAQ_WL_REG 0x15C +#define ROCEE_WRMS_POL_TIME_INTERVAL_REG 0x160 +#define ROCEE_EXT_DB_SQ_REG 0x164 +#define ROCEE_EXT_DB_SQ_H_REG 0x168 +#define ROCEE_EXT_DB_OTH_REG 0x16C + +#define ROCEE_EXT_DB_OTH_H_REG 0x170 +#define ROCEE_EXT_DB_SQ_WL_EMPTY_REG 0x174 +#define ROCEE_EXT_DB_SQ_WL_REG 0x178 +#define ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG 0x17C +#define ROCEE_EXT_DB_OTHERS_WL_REG 0x180 +#define ROCEE_EXT_RAQ_REG 0x184 +#define ROCEE_EXT_RAQ_H_REG 0x188 + +#define ROCEE_CAEP_CE_INTERVAL_CFG_REG 0x190 +#define ROCEE_CAEP_CE_BURST_NUM_CFG_REG 0x194 +#define ROCEE_BT_CMD_L_REG 0x200 + +#define ROCEE_MB1_REG 0x210 +#define ROCEE_DB_SQ_L_0_REG 0x230 +#define ROCEE_DB_OTHERS_L_0_REG 0x238 +#define ROCEE_QP1C_CFG0_0_REG 0x270 + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_REG 0x3A0 +#define ROCEE_CAEP_CEQC_SHIFT_0_REG 0x3B0 +#define ROCEE_CAEP_CE_IRQ_MASK_0_REG 0x3C0 +#define ROCEE_CAEP_CEQ_ALM_OVF_0_REG 0x3C4 +#define ROCEE_CAEP_AE_MASK_REG 0x6C8 +#define ROCEE_CAEP_AE_ST_REG 0x6CC + +#define ROCEE_SDB_ISSUE_PTR_REG 0x758 +#define ROCEE_SDB_SEND_PTR_REG 0x75C +#define ROCEE_SDB_INV_CNT_REG 0x9A4 +#define ROCEE_ECC_UCERR_ALM0_REG 0xB34 +#define ROCEE_ECC_CERR_ALM0_REG 0xB40 + +#endif /* _HNS_ROCE_COMMON_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c new file mode 100644 index 00000000000000..875597b0e69c59 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" +#include "hns_roce_user.h" +#include "hns_roce_common.h" + +static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) +{ + struct ib_cq *ibcq = &hr_cq->ib_cq; + + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev; + struct ib_event event; + struct ib_cq *ibcq; + + ibcq = &hr_cq->ib_cq; + hr_dev = to_hr_dev(ibcq->device); + + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(&hr_dev->pdev->dev, + "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", + event_type, hr_cq->cqn); + return; + } + + if (ibcq->event_handler) { + event.device = ibcq->device; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, + HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A); +} + +static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, + struct hns_roce_mtt *hr_mtt, + struct hns_roce_uar *hr_uar, + struct hns_roce_cq *hr_cq, int vector, + int collapsed) +{ + struct hns_roce_cmd_mailbox *mailbox = NULL; + struct hns_roce_cq_table *cq_table = NULL; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t dma_handle; + u64 *mtts = NULL; + int ret = 0; + + cq_table = &hr_dev->cq_table; + + /* Get the physical address of cq buf */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_mtt->first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); + return -EINVAL; + } + + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "CQ alloc.Invalid vector.\n"); + return -EINVAL; + } + hr_cq->vector = vector; + + ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); + if (ret == -1) { + dev_err(dev, "CQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + /* Get CQC memory HEM(Hardware Entry Memory) table */ + ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); + if (ret) { + dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + goto err_out; + } + + /* The cq insert radix tree */ + spin_lock_irq(&cq_table->lock); + /* Radix_tree: The associated pointer and long integer key value like */ + ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq); + spin_unlock_irq(&cq_table->lock); + if (ret) { + dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n"); + goto err_put; + } + + /* Allocate mailbox memory */ + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_radix; + } + + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, + nent, vector); + + /* Send mailbox to hw */ + ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + goto err_radix; + } + + hr_cq->cons_index = 0; + hr_cq->uar = hr_uar; + + return 0; + +err_radix: + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, hr_cq->cqn); + spin_unlock_irq(&cq_table->lock); + +err_put: + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + +err_out: + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + return ret; +} + +static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, + HNS_ROCE_CMD_TIME_CLASS_A); +} + +static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct device *dev = &hr_dev->pdev->dev; + int ret; + + ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + if (ret) + dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + hr_cq->cqn); + + /* Waiting interrupt process procedure carried out */ + synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); + + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, hr_cq->cqn); + spin_unlock_irq(&cq_table->lock); + + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); +} + +static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, + struct ib_ucontext *context, + struct hns_roce_cq_buf *buf, + struct ib_umem **umem, u64 buf_addr, int cqe) +{ + int ret; + + *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(*umem)) + return PTR_ERR(*umem); + + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), + ilog2((unsigned int)(*umem)->page_size), + &buf->hr_mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + if (ret) + goto err_mtt; + + return 0; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + +err_buf: + ib_umem_release(*umem); + return ret; +} + +static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_cq_buf *buf, u32 nent) +{ + int ret; + + ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, + PAGE_SIZE * 2, &buf->hr_buf); + if (ret) + goto out; + + ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, + buf->hr_buf.page_shift, &buf->hr_mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + if (ret) + goto err_mtt; + + return 0; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + +err_buf: + hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, + &buf->hr_buf); +out: + return ret; +} + +static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_cq_buf *buf, int cqe) +{ + hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, + &buf->hr_buf); +} + +struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_ib_create_cq ucmd; + struct hns_roce_cq *hr_cq = NULL; + struct hns_roce_uar *uar = NULL; + int vector = attr->comp_vector; + int cq_entries = attr->cqe; + int ret = 0; + + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { + dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + cq_entries, hr_dev->caps.max_cqes); + return ERR_PTR(-EINVAL); + } + + hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL); + if (!hr_cq) + return ERR_PTR(-ENOMEM); + + /* In v1 engine, parameter verification */ + if (cq_entries < HNS_ROCE_MIN_CQE_NUM) + cq_entries = HNS_ROCE_MIN_CQE_NUM; + + cq_entries = roundup_pow_of_two((unsigned int)cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; + mutex_init(&hr_cq->resize_mutex); + spin_lock_init(&hr_cq->lock); + hr_cq->hr_resize_buf = NULL; + hr_cq->resize_umem = NULL; + + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + dev_err(dev, "Failed to copy_from_udata.\n"); + ret = -EFAULT; + goto err_cq; + } + + /* Get user space address, write it into mtt table */ + ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf, + &hr_cq->umem, ucmd.buf_addr, + cq_entries); + if (ret) { + dev_err(dev, "Failed to get_cq_umem.\n"); + goto err_cq; + } + + /* Get user space parameters */ + uar = &to_hr_ucontext(context)->uar; + } else { + /* Init mmt table and write buff address to mtt table */ + ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, + cq_entries); + if (ret) { + dev_err(dev, "Failed to alloc_cq_buf.\n"); + goto err_cq; + } + + uar = &hr_dev->priv_uar; + hr_cq->cq_db_l = hr_dev->reg_base + ROCEE_DB_OTHERS_L_0_REG + + 0x1000 * uar->index; + } + + /* Allocate cq index, fill cq_context */ + ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, + uar, hr_cq, vector, 0); + if (ret) { + dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + goto err_mtt; + } + + /* Get created cq handler and carry out event */ + hr_cq->comp = hns_roce_ib_cq_comp; + hr_cq->event = hns_roce_ib_cq_event; + hr_cq->cq_depth = cq_entries; + + if (context) { + if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) { + ret = -EFAULT; + goto err_mtt; + } + } + + return &hr_cq->ib_cq; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + if (context) + ib_umem_release(hr_cq->umem); + else + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, + hr_cq->ib_cq.cqe); + +err_cq: + kfree(hr_cq); + return ERR_PTR(ret); +} + +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + + hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + + if (ib_cq->uobject) + ib_umem_release(hr_cq->umem); + else + /* Free the buff of stored cq */ + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + + kfree(hr_cq); + + return 0; +} + +void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cq *cq; + + cq = radix_tree_lookup(&hr_dev->cq_table.tree, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!cq) { + dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + return; + } + + cq->comp(cq); +} + +void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cq *cq; + + cq = radix_tree_lookup(&cq_table->tree, + cqn & (hr_dev->caps.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + + if (!cq) { + dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + cq->event(cq, (enum hns_roce_event)event_type); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + +int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, + hr_dev->caps.num_cqs - 1, + hr_dev->caps.reserved_cqs, 0); +} + +void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h new file mode 100644 index 00000000000000..00f01be1ffa548 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_DEVICE_H +#define _HNS_ROCE_DEVICE_H + +#include + +#define DRV_NAME "hns_roce" + +#define MAC_ADDR_OCTET_NUM 6 +#define HNS_ROCE_MAX_MSG_LEN 0x80000000 + +#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) + +#define HNS_ROCE_IB_MIN_SQ_STRIDE 6 + +#define HNS_ROCE_BA_SIZE (32 * 4096) + +/* Hardware specification only for v1 engine */ +#define HNS_ROCE_MIN_CQE_NUM 0x40 +#define HNS_ROCE_MIN_WQE_NUM 0x20 + +/* Hardware specification only for v1 engine */ +#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 +#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 + +#define HNS_ROCE_MAX_IRQ_NUM 34 + +#define HNS_ROCE_COMP_VEC_NUM 32 + +#define HNS_ROCE_AEQE_VEC_NUM 1 +#define HNS_ROCE_AEQE_OF_VEC_NUM 1 + +/* 4G/4K = 1M */ +#define HNS_ROCE_SL_SHIFT 29 +#define HNS_ROCE_TCLASS_SHIFT 20 +#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff + +#define HNS_ROCE_MAX_PORTS 6 +#define HNS_ROCE_MAX_GID_NUM 16 +#define HNS_ROCE_GID_SIZE 16 + +#define MR_TYPE_MR 0x00 +#define MR_TYPE_DMA 0x03 + +#define PKEY_ID 0xffff +#define NODE_DESC_SIZE 64 + +#define SERV_TYPE_RC 0 +#define SERV_TYPE_RD 1 +#define SERV_TYPE_UC 2 +#define SERV_TYPE_UD 3 + +#define PAGES_SHIFT_8 8 +#define PAGES_SHIFT_16 16 +#define PAGES_SHIFT_24 24 +#define PAGES_SHIFT_32 32 + +enum hns_roce_qp_state { + HNS_ROCE_QP_STATE_RST, + HNS_ROCE_QP_STATE_INIT, + HNS_ROCE_QP_STATE_RTR, + HNS_ROCE_QP_STATE_RTS, + HNS_ROCE_QP_STATE_SQD, + HNS_ROCE_QP_STATE_ERR, + HNS_ROCE_QP_NUM_STATE, +}; + +enum hns_roce_event { + HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01, + HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02, + HNS_ROCE_EVENT_TYPE_COMM_EST = 0x03, + HNS_ROCE_EVENT_TYPE_SQ_DRAINED = 0x04, + HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR = 0x06, + HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR = 0x07, + HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH = 0x08, + HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH = 0x09, + HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR = 0x0a, + HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR = 0x0b, + HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW = 0x0c, + HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID = 0x0d, + HNS_ROCE_EVENT_TYPE_PORT_CHANGE = 0x0f, + /* 0x10 and 0x11 is unused in currently application case */ + HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, + HNS_ROCE_EVENT_TYPE_MB = 0x13, + HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, +}; + +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR = 2, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, + HNS_ROCE_LWQCE_SL_ERROR = 6, + HNS_ROCE_LWQCE_PORT_ERROR = 7, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, + HNS_ROCE_LAVWQE_VA_ERROR = 3, + HNS_ROCE_LAVWQE_PD_ERROR = 4, + HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, + HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, +}; + +#define HNS_ROCE_CMD_SUCCESS 1 + +#define HNS_ROCE_PORT_DOWN 0 +#define HNS_ROCE_PORT_UP 1 + +#define HNS_ROCE_MTT_ENTRY_PER_SEG 8 + +#define PAGE_ADDR_SHIFT 12 + +struct hns_roce_uar { + u64 pfn; + unsigned long index; +}; + +struct hns_roce_ucontext { + struct ib_ucontext ibucontext; + struct hns_roce_uar uar; +}; + +struct hns_roce_pd { + struct ib_pd ibpd; + unsigned long pdn; +}; + +struct hns_roce_bitmap { + /* Bitmap Traversal last a bit which is 1 */ + unsigned long last; + unsigned long top; + unsigned long max; + unsigned long reserved_top; + unsigned long mask; + spinlock_t lock; + unsigned long *table; +}; + +/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */ +/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ +/* Every bit repesent to a partner free/used status in bitmap */ +/* +* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 +* Bit = 1 represent to idle and available; bit = 0: not available +*/ +struct hns_roce_buddy { + /* Members point to every order level bitmap */ + unsigned long **bits; + /* Represent to avail bits of the order level bitmap */ + u32 *num_free; + int max_order; + spinlock_t lock; +}; + +/* For Hardware Entry Memory */ +struct hns_roce_hem_table { + /* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */ + u32 type; + /* HEM array elment num */ + unsigned long num_hem; + /* HEM entry record obj total num */ + unsigned long num_obj; + /*Single obj size */ + unsigned long obj_size; + int lowmem; + struct mutex mutex; + struct hns_roce_hem **hem; +}; + +struct hns_roce_mtt { + unsigned long first_seg; + int order; + int page_shift; +}; + +/* Only support 4K page size for mr register */ +#define MR_SIZE_4K 0 + +struct hns_roce_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + u64 iova; /* MR's virtual orignal addr */ + u64 size; /* Address range of MR */ + u32 key; /* Key of MR */ + u32 pd; /* PD num of MR */ + u32 access;/* Access permission of MR */ + int enabled; /* MR's active status */ + int type; /* MR's register type */ + u64 *pbl_buf;/* MR's PBL space */ + dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ +}; + +struct hns_roce_mr_table { + struct hns_roce_bitmap mtpt_bitmap; + struct hns_roce_buddy mtt_buddy; + struct hns_roce_hem_table mtt_table; + struct hns_roce_hem_table mtpt_table; +}; + +struct hns_roce_wq { + u64 *wrid; /* Work request ID */ + spinlock_t lock; + int wqe_cnt; /* WQE num */ + u32 max_post; + int max_gs; + int offset; + int wqe_shift;/* WQE size */ + u32 head; + u32 tail; + void __iomem *db_reg_l; +}; + +struct hns_roce_buf_list { + void *buf; + dma_addr_t map; +}; + +struct hns_roce_buf { + struct hns_roce_buf_list direct; + struct hns_roce_buf_list *page_list; + int nbufs; + u32 npages; + int page_shift; +}; + +struct hns_roce_cq_buf { + struct hns_roce_buf hr_buf; + struct hns_roce_mtt hr_mtt; +}; + +struct hns_roce_cq_resize { + struct hns_roce_cq_buf hr_buf; + int cqe; +}; + +struct hns_roce_cq { + struct ib_cq ib_cq; + struct hns_roce_cq_buf hr_buf; + /* pointer to store information after resize*/ + struct hns_roce_cq_resize *hr_resize_buf; + spinlock_t lock; + struct mutex resize_mutex; + struct ib_umem *umem; + struct ib_umem *resize_umem; + void (*comp)(struct hns_roce_cq *); + void (*event)(struct hns_roce_cq *, enum hns_roce_event); + + struct hns_roce_uar *uar; + u32 cq_depth; + u32 cons_index; + void __iomem *cq_db_l; + void __iomem *tptr_addr; + unsigned long cqn; + u32 vector; + atomic_t refcount; + struct completion free; +}; + +struct hns_roce_srq { + struct ib_srq ibsrq; + int srqn; +}; + +struct hns_roce_uar_table { + struct hns_roce_bitmap bitmap; +}; + +struct hns_roce_qp_table { + struct hns_roce_bitmap bitmap; + spinlock_t lock; + struct hns_roce_hem_table qp_table; + struct hns_roce_hem_table irrl_table; +}; + +struct hns_roce_cq_table { + struct hns_roce_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct hns_roce_hem_table table; +}; + +struct hns_roce_raq_table { + struct hns_roce_buf_list *e_raq_buf; +}; + +struct hns_roce_av { + __le32 port_pd; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __le32 sl_tclass_flowlabel; + u8 dgid[HNS_ROCE_GID_SIZE]; + u8 mac[6]; + __le16 vlan; +}; + +struct hns_roce_ah { + struct ib_ah ibah; + struct hns_roce_av av; +}; + +struct hns_roce_cmd_context { + struct completion done; + int result; + int next; + u64 out_param; + u16 token; +}; + +struct hns_roce_cmdq { + struct dma_pool *pool; + u8 __iomem *hcr; + struct mutex hcr_mutex; + struct semaphore poll_sem; + /* + * Event mode: cmd register mutex protection, + * ensure to not exceed max_cmds and user use limit region + */ + struct semaphore event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct hns_roce_cmd_context *context; + /* + * Result of get integer part + * which max_comds compute according a power of 2 + */ + u16 token_mask; + /* + * Process whether use event mode, init default non-zero + * After the event queue of cmd event ready, + * can switch into event mode + * close device, switch into poll mode(non event mode) + */ + u8 use_events; + u8 toggle; +}; + +struct hns_roce_dev; + +struct hns_roce_qp { + struct ib_qp ibqp; + struct hns_roce_buf hr_buf; + struct hns_roce_wq rq; + __le64 doorbell_qpn; + __le32 sq_signal_bits; + u32 sq_next_wqe; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct hns_roce_wq sq; + + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u32 buff_size; + struct mutex mutex; + u8 port; + u8 sl; + u8 resp_depth; + u8 state; + u32 access_flags; + u32 pkey_index; + void (*event)(struct hns_roce_qp *, + enum hns_roce_event); + unsigned long qpn; + + atomic_t refcount; + struct completion free; +}; + +struct hns_roce_sqp { + struct hns_roce_qp hr_qp; +}; + +struct hns_roce_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; + struct notifier_block nb; + struct notifier_block nb_inet; + /* 16 GID is shared by 6 port in v1 engine. */ + union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM]; + u8 phy_port[HNS_ROCE_MAX_PORTS]; +}; + +struct hns_roce_eq { + struct hns_roce_dev *hr_dev; + void __iomem *doorbell; + + int type_flag;/* Aeq:1 ceq:0 */ + int eqn; + u32 entries; + int log_entries; + int eqe_size; + int irq; + int log_page_size; + int cons_index; + struct hns_roce_buf_list *buf_list; +}; + +struct hns_roce_eq_table { + struct hns_roce_eq *eq; + void __iomem **eqc_base; +}; + +struct hns_roce_caps { + u8 num_ports; + int gid_table_len[HNS_ROCE_MAX_PORTS]; + int pkey_table_len[HNS_ROCE_MAX_PORTS]; + int local_ca_ack_delay; + int num_uars; + u32 phy_num_uars; + u32 max_sq_sg; /* 2 */ + u32 max_sq_inline; /* 32 */ + u32 max_rq_sg; /* 2 */ + int num_qps; /* 256k */ + u32 max_wqes; /* 16k */ + u32 max_sq_desc_sz; /* 64 */ + u32 max_rq_desc_sz; /* 64 */ + int max_qp_init_rdma; + int max_qp_dest_rdma; + int sqp_start; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_aeq_vectors; /* 1 */ + int num_comp_vectors; /* 32 ceq */ + int num_other_vectors; + int num_mtpts; + u32 num_mtt_segs; + int reserved_mrws; + int reserved_uars; + int num_pds; + int reserved_pds; + u32 mtt_entry_sz; + u32 cq_entry_sz; + u32 page_size_cap; + u32 reserved_lkey; + int mtpt_entry_sz; + int qpc_entry_sz; + int irrl_entry_sz; + int cqc_entry_sz; + int aeqe_depth; + int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; + enum ib_mtu max_mtu; +}; + +struct hns_roce_hw { + int (*reset)(struct hns_roce_dev *hr_dev, bool enable); + void (*hw_profile)(struct hns_roce_dev *hr_dev); + int (*hw_init)(struct hns_roce_dev *hr_dev); + void (*hw_exit)(struct hns_roce_dev *hr_dev); + void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid); + void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); + void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, + enum ib_mtu mtu); + int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx); + void (*write_cqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, + dma_addr_t dma_handle, int nent, u32 vector); + int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state); + int (*destroy_qp)(struct ib_qp *ibqp); + int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); + int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); + void *priv; +}; + +struct hns_roce_dev { + struct ib_device ib_dev; + struct platform_device *pdev; + struct hns_roce_uar priv_uar; + const char *irq_names; + spinlock_t sm_lock; + spinlock_t cq_db_lock; + spinlock_t bt_cmd_lock; + struct hns_roce_ib_iboe iboe; + + int irq[HNS_ROCE_MAX_IRQ_NUM]; + u8 __iomem *reg_base; + struct hns_roce_caps caps; + struct radix_tree_root qp_table_tree; + + unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM]; + u64 sys_image_guid; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_rev; + void __iomem *priv_addr; + + struct hns_roce_cmdq cmd; + struct hns_roce_bitmap pd_bitmap; + struct hns_roce_uar_table uar_table; + struct hns_roce_mr_table mr_table; + struct hns_roce_cq_table cq_table; + struct hns_roce_qp_table qp_table; + struct hns_roce_eq_table eq_table; + + int cmd_mod; + int loop_idc; + struct hns_roce_hw *hw; +}; + +static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) +{ + return container_of(ib_dev, struct hns_roce_dev, ib_dev); +} + +static inline struct hns_roce_ucontext + *to_hr_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct hns_roce_ucontext, ibucontext); +} + +static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct hns_roce_pd, ibpd); +} + +static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct hns_roce_ah, ibah); +} + +static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct hns_roce_mr, ibmr); +} + +static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct hns_roce_qp, ibqp); +} + +static inline struct hns_roce_cq *to_hr_cq(struct ib_cq *ib_cq) +{ + return container_of(ib_cq, struct hns_roce_cq, ib_cq); +} + +static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct hns_roce_srq, ibsrq); +} + +static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) +{ + return container_of(hr_qp, struct hns_roce_sqp, hr_qp); +} + +static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +{ + __raw_writeq(*(u64 *) val, dest); +} + +static inline struct hns_roce_qp + *__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn) +{ + return radix_tree_lookup(&hr_dev->qp_table_tree, + qpn & (hr_dev->caps.num_qps - 1)); +} + +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +{ + u32 bits_per_long_val = BITS_PER_LONG; + + if (bits_per_long_val == 64 || buf->nbufs == 1) + return (char *)(buf->direct.buf) + offset; + else + return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) + + (offset & (PAGE_SIZE - 1)); +} + +int hns_roce_init_uar_table(struct hns_roce_dev *dev); +int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); +void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); +void hns_roce_cleanup_uar_table(struct hns_roce_dev *dev); + +int hns_roce_cmd_init(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, + u64 out_param); +int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); + +int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, + struct hns_roce_mtt *mtt); +void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt); +int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct hns_roce_buf *buf); + +int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); + +void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); + +int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj); +int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 resetrved_top); +void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap); +void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev); +int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, + int align, unsigned long *obj); +void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, + unsigned long obj, int cnt); + +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int hns_roce_destroy_ah(struct ib_ah *ah); + +struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, + struct ib_ucontext *context, + struct ib_udata *udata); +int hns_roce_dealloc_pd(struct ib_pd *pd); + +struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int hns_roce_dereg_mr(struct ib_mr *ibmr); + +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, + struct hns_roce_buf *buf); +int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, + struct hns_roce_buf *buf); + +int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct ib_umem *umem); + +struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n); +void *get_send_wqe(struct hns_roce_qp *hr_qp, int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, + struct ib_cq *ib_cq); +enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq); +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq); +void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); +void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); +void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, + int cnt); +__be32 send_ieth(struct ib_send_wr *wr); +int to_hr_qp_type(int qp_type); + +struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); + +void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); +void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); +void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); + +extern struct hns_roce_hw hns_roce_hw_v1; + +#endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c new file mode 100644 index 00000000000000..5febbb4b68e798 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -0,0 +1,762 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_eq.h" + +static void eq_set_cons_index(struct hns_roce_eq *eq, int req_not) +{ + roce_raw_write((eq->cons_index & CONS_INDEX_MASK) | + (req_not << eq->log_entries), eq->doorbell); + /* Memory barrier */ + mb(); +} + +static struct hns_roce_aeqe *get_aeqe(struct hns_roce_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE; + + return (struct hns_roce_aeqe *)((u8 *) + (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + + off % HNS_ROCE_BA_SIZE); +} + +static struct hns_roce_aeqe *next_aeqe_sw(struct hns_roce_eq *eq) +{ + struct hns_roce_aeqe *aeqe = get_aeqe(eq, eq->cons_index); + + return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^ + !!(eq->cons_index & eq->entries)) ? aeqe : NULL; +} + +static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, int qpn) +{ + struct device *dev = &hr_dev->pdev->dev; + + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); + dev_warn(dev, "Local Work Queue Catastrophic Error.\n"); + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_LWQCE_QPC_ERROR: + dev_warn(dev, "QP %d, QPC error.\n", qpn); + break; + case HNS_ROCE_LWQCE_MTU_ERROR: + dev_warn(dev, "QP %d, MTU error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: + dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: + dev_warn(dev, "QP %d, WQE addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: + dev_warn(dev, "QP %d, WQE shift error\n", qpn); + break; + case HNS_ROCE_LWQCE_SL_ERROR: + dev_warn(dev, "QP %d, SL error.\n", qpn); + break; + case HNS_ROCE_LWQCE_PORT_ERROR: + dev_warn(dev, "QP %d, port error.\n", qpn); + break; + default: + break; + } + + hns_roce_qp_event(hr_dev, roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); +} + +static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, + int qpn) +{ + struct device *dev = &hr_dev->pdev->dev; + + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); + dev_warn(dev, "Local Access Violation Work Queue Error.\n"); + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: + dev_warn(dev, "QP %d, R_key violation.\n", qpn); + break; + case HNS_ROCE_LAVWQE_LENGTH_ERROR: + dev_warn(dev, "QP %d, length error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_VA_ERROR: + dev_warn(dev, "QP %d, VA error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_PD_ERROR: + dev_err(dev, "QP %d, PD error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_RW_ACC_ERROR: + dev_warn(dev, "QP %d, rw acc error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: + dev_warn(dev, "QP %d, key state error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: + dev_warn(dev, "QP %d, MR operation error.\n", qpn); + break; + default: + break; + } + + hns_roce_qp_event(hr_dev, roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); +} + +static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe) +{ + struct device *dev = &hr_dev->pdev->dev; + + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_DB_SUBTYPE_SDB_OVF: + dev_warn(dev, "SDB overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF: + dev_warn(dev, "SDB almost overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP: + dev_warn(dev, "SDB almost empty.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_OVF: + dev_warn(dev, "ODB overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF: + dev_warn(dev, "ODB almost overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP: + dev_warn(dev, "SDB almost empty.\n"); + break; + default: + break; + } +} + +static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_aeqe *aeqe; + int aeqes_found = 0; + int qpn = 0; + + while ((aeqe = next_aeqe_sw(eq))) { + dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe, + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + /* Memory barrier */ + rmb(); + + switch (roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + dev_warn(dev, "PATH MIG not supported\n"); + break; + case HNS_ROCE_EVENT_TYPE_COMM_EST: + dev_warn(dev, "COMMUNICATION established\n"); + break; + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + dev_warn(dev, "SQ DRAINED not supported\n"); + break; + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + dev_warn(dev, "PATH MIG failed\n"); + break; + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + dev_warn(dev, "qpn = 0x%lx\n", + roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S)); + hns_roce_qp_event(hr_dev, + roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + break; + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn); + break; + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + dev_warn(dev, "SRQ not support!\n"); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + dev_warn(dev, "CQ 0x%lx access err.\n", + roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + hns_roce_cq_event(hr_dev, + le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + break; + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + dev_warn(dev, "CQ 0x%lx overflow\n", + roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + hns_roce_cq_event(hr_dev, + le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID: + dev_warn(dev, "CQ ID invalid.\n"); + hns_roce_cq_event(hr_dev, + le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)), + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + break; + case HNS_ROCE_EVENT_TYPE_PORT_CHANGE: + dev_warn(dev, "port change.\n"); + break; + case HNS_ROCE_EVENT_TYPE_MB: + hns_roce_cmd_event(hr_dev, + le16_to_cpu(aeqe->event.cmd.token), + aeqe->event.cmd.status, + le64_to_cpu(aeqe->event.cmd.out_param + )); + break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + hns_roce_db_overflow_handle(hr_dev, aeqe); + break; + case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: + dev_warn(dev, "CEQ 0x%lx overflow.\n", + roce_get_field(aeqe->event.ce_event.ceqe, + HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, + HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); + break; + default: + dev_warn(dev, "Unhandled event 0x%lx on EQ %d at index %u\n", + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S), + eq->eqn, eq->cons_index); + break; + }; + + eq->cons_index++; + aeqes_found = 1; + + if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) { + dev_warn(dev, "cons_index overflow, set back to zero\n" + ); + eq->cons_index = 0; + } + } + + eq_set_cons_index(eq, 0); + + return aeqes_found; +} + +static struct hns_roce_ceqe *get_ceqe(struct hns_roce_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE; + + return (struct hns_roce_ceqe *)((u8 *) + (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + + off % HNS_ROCE_BA_SIZE); +} + +static struct hns_roce_ceqe *next_ceqe_sw(struct hns_roce_eq *eq) +{ + struct hns_roce_ceqe *ceqe = get_ceqe(eq, eq->cons_index); + + return (!!(roce_get_bit(ceqe->ceqe.comp, + HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^ + (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; +} + +static int hns_roce_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + struct hns_roce_ceqe *ceqe; + int ceqes_found = 0; + u32 cqn; + + while ((ceqe = next_ceqe_sw(eq))) { + /* Memory barrier */ + rmb(); + cqn = roce_get_field(ceqe->ceqe.comp, + HNS_ROCE_CEQE_CEQE_COMP_CQN_M, + HNS_ROCE_CEQE_CEQE_COMP_CQN_S); + hns_roce_cq_completion(hr_dev, cqn); + + ++eq->cons_index; + ceqes_found = 1; + + if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth[eq->eqn] - 1) { + dev_warn(&eq->hr_dev->pdev->dev, + "cons_index overflow, set back to zero\n"); + eq->cons_index = 0; + } + } + + eq_set_cons_index(eq, 0); + + return ceqes_found; +} + +static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + struct device *dev = &eq->hr_dev->pdev->dev; + int eqovf_found = 0; + u32 caepaemask_val; + u32 cealmovf_val; + u32 caepaest_val; + u32 aeshift_val; + u32 ceshift_val; + u32 cemask_val; + int i = 0; + + /** + * AEQ overflow ECC mult bit err CEQ overflow alarm + * must clear interrupt, mask irq, clear irq, cancel mask operation + */ + aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + + if (roce_get_bit(aeshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { + dev_warn(dev, "AEQ overflow!\n"); + + /* Set mask */ + caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(caepaemask_val, + ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_ENABLE); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); + + /* Clear int state(INT_WC : write 1 clear) */ + caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); + roce_set_bit(caepaest_val, + ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); + + /* Clear mask */ + caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(caepaemask_val, + ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_DISABLE); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); + } + + /* CEQ almost overflow */ + for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { + ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + + i * CEQ_REG_OFFSET); + + if (roce_get_bit(ceshift_val, + ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { + dev_warn(dev, "CEQ[%d] almost overflow!\n", i); + eqovf_found++; + + /* Set mask */ + cemask_val = roce_read(hr_dev, + ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cemask_val, + ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_ENABLE); + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, cemask_val); + + /* Clear int state(INT_WC : write 1 clear) */ + cealmovf_val = roce_read(hr_dev, + ROCEE_CAEP_CEQ_ALM_OVF_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cealmovf_val, + ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, + 1); + roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + + i * CEQ_REG_OFFSET, cealmovf_val); + + /* Clear mask */ + cemask_val = roce_read(hr_dev, + ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cemask_val, + ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_DISABLE); + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, cemask_val); + } + } + + /* ECC multi-bit error alarm */ + dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n", + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG), + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG), + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG)); + + dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n", + roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG), + roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG), + roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG)); + + return eqovf_found; +} + +static int hns_roce_eq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + int eqes_found = 0; + + if (likely(eq->type_flag == HNS_ROCE_CEQ)) + /* CEQ irq routine, CEQ is pulse irq, not clear */ + eqes_found = hns_roce_ceq_int(hr_dev, eq); + else if (likely(eq->type_flag == HNS_ROCE_AEQ)) + /* AEQ irq routine, AEQ is pulse irq, not clear */ + eqes_found = hns_roce_aeq_int(hr_dev, eq); + else + /* AEQ queue overflow irq */ + eqes_found = hns_roce_aeq_ovf_int(hr_dev, eq); + + return eqes_found; +} + +static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr) +{ + int int_work = 0; + struct hns_roce_eq *eq = eq_ptr; + struct hns_roce_dev *hr_dev = eq->hr_dev; + + int_work = hns_roce_eq_int(hr_dev, eq); + + return IRQ_RETVAL(int_work); +} + +static void hns_roce_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, + int enable_flag) +{ + void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + u32 val; + + val = readl(eqc); + + if (enable_flag) + roce_set_field(val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_VALID); + else + roce_set_field(val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_INVALID); + writel(val, eqc); +} + +static int hns_roce_create_eq(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t tmp_dma_addr; + u32 eqconsindx_val = 0; + u32 eqcuridx_val = 0; + u32 eqshift_val = 0; + int num_bas = 0; + int ret; + int i; + + num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) + + HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; + + if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) { + dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n", + (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE, + num_bas); + return -EINVAL; + } + + eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL); + if (!eq->buf_list) + return -ENOMEM; + + for (i = 0; i < num_bas; ++i) { + eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE, + &tmp_dma_addr, + GFP_KERNEL); + if (!eq->buf_list[i].buf) { + ret = -ENOMEM; + goto err_out_free_pages; + } + + eq->buf_list[i].map = tmp_dma_addr; + memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); + } + eq->cons_index = 0; + roce_set_field(eqshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_INVALID); + roce_set_field(eqshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, + eq->log_entries); + writel(eqshift_val, eqc); + + /* Configure eq extended address 12~44bit */ + writel((u32)(eq->buf_list[0].map >> 12), (u8 *)eqc + 4); + + /* + * Configure eq extended address 45~49 bit. + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, + eq->buf_list[0].map >> 44); + roce_set_field(eqcuridx_val, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + writel(eqcuridx_val, (u8 *)eqc + 8); + + /* Configure eq consumer index */ + roce_set_field(eqconsindx_val, + ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + writel(eqconsindx_val, (u8 *)eqc + 0xc); + + return 0; + +err_out_free_pages: + for (i = i - 1; i >= 0; i--) + dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf, + eq->buf_list[i].map); + + kfree(eq->buf_list); + return ret; +} + +static void hns_roce_free_eq(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + int i = 0; + int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) + + HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; + + if (!eq->buf_list) + return; + + for (i = 0; i < npages; ++i) + dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE, + eq->buf_list[i].buf, eq->buf_list[i].map); + + kfree(eq->buf_list); +} + +static void hns_roce_int_mask_en(struct hns_roce_dev *hr_dev) +{ + int i = 0; + u32 aemask_val; + int masken = 0; + + /* AEQ INT */ + aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + masken); + roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); + + /* CEQ INT */ + for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { + /* IRQ mask */ + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, masken); + } +} + +static void hns_roce_ce_int_default_cfg(struct hns_roce_dev *hr_dev) +{ + /* Configure ce int interval */ + roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG, + HNS_ROCE_CEQ_DEFAULT_INTERVAL); + + /* Configure ce int burst num */ + roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG, + HNS_ROCE_CEQ_DEFAULT_BURST_NUM); +} + +int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_eq *eq = NULL; + int eq_num = 0; + int ret = 0; + int i = 0; + int j = 0; + + eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; + eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL); + if (!eq_table->eq) + return -ENOMEM; + + eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base), + GFP_KERNEL); + if (!eq_table->eqc_base) { + ret = -ENOMEM; + goto err_eqc_base_alloc_fail; + } + + for (i = 0; i < eq_num; i++) { + eq = &eq_table->eq[i]; + eq->hr_dev = hr_dev; + eq->eqn = i; + eq->irq = hr_dev->irq[i]; + eq->log_page_size = PAGE_SHIFT; + + if (i < hr_dev->caps.num_comp_vectors) { + /* CEQ */ + eq_table->eqc_base[i] = hr_dev->reg_base + + ROCEE_CAEP_CEQC_SHIFT_0_REG + + HNS_ROCE_CEQC_REG_OFFSET * i; + eq->type_flag = HNS_ROCE_CEQ; + eq->doorbell = hr_dev->reg_base + + ROCEE_CAEP_CEQC_CONS_IDX_0_REG + + HNS_ROCE_CEQC_REG_OFFSET * i; + eq->entries = hr_dev->caps.ceqe_depth[i]; + eq->log_entries = ilog2(eq->entries); + eq->eqe_size = sizeof(struct hns_roce_ceqe); + } else { + /* AEQ */ + eq_table->eqc_base[i] = hr_dev->reg_base + + ROCEE_CAEP_AEQC_AEQE_SHIFT_REG; + eq->type_flag = HNS_ROCE_AEQ; + eq->doorbell = hr_dev->reg_base + + ROCEE_CAEP_AEQE_CONS_IDX_REG; + eq->entries = hr_dev->caps.aeqe_depth; + eq->log_entries = ilog2(eq->entries); + eq->eqe_size = sizeof(struct hns_roce_aeqe); + } + } + + /* Disable irq */ + hns_roce_int_mask_en(hr_dev); + + /* Configure CE irq interval and burst num */ + hns_roce_ce_int_default_cfg(hr_dev); + + for (i = 0; i < eq_num; i++) { + ret = hns_roce_create_eq(hr_dev, &eq_table->eq[i]); + if (ret) { + dev_err(dev, "eq create failed\n"); + goto err_create_eq_fail; + } + } + + for (j = 0; j < eq_num; j++) { + ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt, + 0, hr_dev->irq_names, eq_table->eq + j); + if (ret) { + dev_err(dev, "request irq error!\n"); + goto err_request_irq_fail; + } + } + + for (i = 0; i < eq_num; i++) + hns_roce_enable_eq(hr_dev, i, EQ_ENABLE); + + return 0; + +err_request_irq_fail: + for (j = j - 1; j >= 0; j--) + free_irq(eq_table->eq[j].irq, eq_table->eq + j); + +err_create_eq_fail: + for (i = i - 1; i >= 0; i--) + hns_roce_free_eq(hr_dev, &eq_table->eq[i]); + + kfree(eq_table->eqc_base); + +err_eqc_base_alloc_fail: + kfree(eq_table->eq); + + return ret; +} + +void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev) +{ + int i; + int eq_num; + struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; + + eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; + for (i = 0; i < eq_num; i++) { + /* Disable EQ */ + hns_roce_enable_eq(hr_dev, i, EQ_DISABLE); + + free_irq(eq_table->eq[i].irq, eq_table->eq + i); + + hns_roce_free_eq(hr_dev, &eq_table->eq[i]); + } + + kfree(eq_table->eqc_base); + kfree(eq_table->eq); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.h b/drivers/infiniband/hw/hns/hns_roce_eq.h new file mode 100644 index 00000000000000..fe4388191a3c25 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_eq.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_EQ_H +#define _HNS_ROCE_EQ_H + +#define HNS_ROCE_CEQ 1 +#define HNS_ROCE_AEQ 2 + +#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 +#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 +#define HNS_ROCE_CEQC_REG_OFFSET 0x18 + +#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 +#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 + +#define HNS_ROCE_INT_MASK_DISABLE 0 +#define HNS_ROCE_INT_MASK_ENABLE 1 + +#define EQ_ENABLE 1 +#define EQ_DISABLE 0 +#define CONS_INDEX_MASK 0xffff + +#define CEQ_REG_OFFSET 0x18 + +enum { + HNS_ROCE_EQ_STAT_INVALID = 0, + HNS_ROCE_EQ_STAT_VALID = 2, +}; + +struct hns_roce_aeqe { + u32 asyn; + union { + struct { + u32 qp; + u32 rsv0; + u32 rsv1; + } qp_event; + + struct { + u32 cq; + u32 rsv0; + u32 rsv1; + } cq_event; + + struct { + u32 port; + u32 rsv0; + u32 rsv1; + } port_event; + + struct { + u32 ceqe; + u32 rsv0; + u32 rsv1; + } ce_event; + + struct { + __le64 out_param; + __le16 token; + u8 status; + u8 rsv0; + } __packed cmd; + } event; +}; + +#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16 +#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M \ + (((1UL << 8) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S) + +#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24 +#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M \ + (((1UL << 7) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S) + +#define HNS_ROCE_AEQE_U32_4_OWNER_S 31 + +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0 +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M \ + (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S) + +#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0 +#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M \ + (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S) + +#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 +#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M \ + (((1UL << 5) - 1) << HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S) + +struct hns_roce_ceqe { + union { + int comp; + } ceqe; +}; + +#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0 + +#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16 +#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M \ + (((1UL << 16) - 1) << HNS_ROCE_CEQE_CEQE_COMP_CQN_S) + +#endif /* _HNS_ROCE_EQ_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c new file mode 100644 index 00000000000000..d53d643623899e --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "hns_roce_device.h" +#include "hns_roce_hem.h" +#include "hns_roce_common.h" + +#define HW_SYNC_TIMEOUT_MSECS 500 +#define HW_SYNC_SLEEP_TIME_INTERVAL 20 + +#define HNS_ROCE_HEM_ALLOC_SIZE (1 << 17) +#define HNS_ROCE_TABLE_CHUNK_SIZE (1 << 17) + +#define DMA_ADDR_T_SHIFT 12 +#define BT_CMD_SYNC_SHIFT 31 +#define BT_BA_SHIFT 32 + +struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, + gfp_t gfp_mask) +{ + struct hns_roce_hem_chunk *chunk = NULL; + struct hns_roce_hem *hem; + struct scatterlist *mem; + int order; + void *buf; + + WARN_ON(gfp_mask & __GFP_HIGHMEM); + + hem = kmalloc(sizeof(*hem), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!hem) + return NULL; + + hem->refcount = 0; + INIT_LIST_HEAD(&hem->chunk_list); + + order = get_order(HNS_ROCE_HEM_ALLOC_SIZE); + + while (npages > 0) { + if (!chunk) { + chunk = kmalloc(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!chunk) + goto fail; + + sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN); + chunk->npages = 0; + chunk->nsg = 0; + list_add_tail(&chunk->list, &hem->chunk_list); + } + + while (1 << order > npages) + --order; + + /* + * Alloc memory one time. If failed, don't alloc small block + * memory, directly return fail. + */ + mem = &chunk->mem[chunk->npages]; + buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, + &sg_dma_address(mem), gfp_mask); + if (!buf) + goto fail; + + sg_set_buf(mem, buf, PAGE_SIZE << order); + WARN_ON(mem->offset); + sg_dma_len(mem) = PAGE_SIZE << order; + + ++chunk->npages; + ++chunk->nsg; + npages -= 1 << order; + } + + return hem; + +fail: + hns_roce_free_hem(hr_dev, hem); + return NULL; +} + +void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) +{ + struct hns_roce_hem_chunk *chunk, *tmp; + int i; + + if (!hem) + return; + + list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) + dma_free_coherent(&hr_dev->pdev->dev, + chunk->mem[i].length, + lowmem_page_address(sg_page(&chunk->mem[i])), + sg_dma_address(&chunk->mem[i])); + kfree(chunk); + } + + kfree(hem); +} + +static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + spinlock_t *lock = &hr_dev->bt_cmd_lock; + unsigned long end = 0; + unsigned long flags; + struct hns_roce_hem_iter iter; + void __iomem *bt_cmd; + u32 bt_cmd_h_val = 0; + u32 bt_cmd_val[2]; + u32 bt_cmd_l = 0; + u64 bt_ba = 0; + int ret = 0; + + /* Find the HEM(Hardware Entry Memory) entry */ + unsigned long i = (obj & (table->num_obj - 1)) / + (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + + switch (table->type) { + case HEM_TYPE_QPC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); + break; + case HEM_TYPE_MTPT: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_MTPT); + break; + case HEM_TYPE_CQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); + break; + case HEM_TYPE_SRQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_SRQC); + break; + default: + return ret; + } + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + + /* Currently iter only a chunk */ + for (hns_roce_hem_first(table->hem[i], &iter); + !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { + bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT; + + spin_lock_irqsave(lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; + while (1) { + if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { + if (!(time_before(jiffies, end))) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(lock, flags); + return -EBUSY; + } + } else { + break; + } + msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + } + + bt_cmd_l = (u32)bt_ba; + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, + bt_ba >> BT_BA_SHIFT); + + bt_cmd_val[0] = bt_cmd_l; + bt_cmd_val[1] = bt_cmd_h_val; + hns_roce_write64_k(bt_cmd_val, + hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + spin_unlock_irqrestore(lock, flags); + } + + return ret; +} + +static int hns_roce_clear_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long end = 0; + unsigned long flags; + void __iomem *bt_cmd; + uint32_t bt_cmd_val[2]; + u32 bt_cmd_h_val = 0; + int ret = 0; + + switch (table->type) { + case HEM_TYPE_QPC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); + break; + case HEM_TYPE_MTPT: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_MTPT); + break; + case HEM_TYPE_CQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); + break; + case HEM_TYPE_SRQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_SRQC); + break; + default: + return ret; + } + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, 0); + + spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; + while (1) { + if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { + if (!(time_before(jiffies, end))) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, + flags); + return -EBUSY; + } + } else { + break; + } + msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + } + + bt_cmd_val[0] = 0; + bt_cmd_val[1] = bt_cmd_h_val; + hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags); + + return ret; +} + +int hns_roce_table_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + int ret = 0; + unsigned long i; + + i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / + table->obj_size); + + mutex_lock(&table->mutex); + + if (table->hem[i]) { + ++table->hem[i]->refcount; + goto out; + } + + table->hem[i] = hns_roce_alloc_hem(hr_dev, + HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : + GFP_HIGHUSER) | __GFP_NOWARN); + if (!table->hem[i]) { + ret = -ENOMEM; + goto out; + } + + /* Set HEM base address(128K/page, pa) to Hardware */ + if (hns_roce_set_hem(hr_dev, table, obj)) { + ret = -ENODEV; + dev_err(dev, "set HEM base address to HW failed.\n"); + goto out; + } + + ++table->hem[i]->refcount; +out: + mutex_unlock(&table->mutex); + return ret; +} + +void hns_roce_table_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long i; + + i = (obj & (table->num_obj - 1)) / + (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + + mutex_lock(&table->mutex); + + if (--table->hem[i]->refcount == 0) { + /* Clear HEM base address */ + if (hns_roce_clear_hem(hr_dev, table, obj)) + dev_warn(dev, "Clear HEM base address failed.\n"); + + hns_roce_free_hem(hr_dev, table->hem[i]); + table->hem[i] = NULL; + } + + mutex_unlock(&table->mutex); +} + +void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, + dma_addr_t *dma_handle) +{ + struct hns_roce_hem_chunk *chunk; + unsigned long idx; + int i; + int offset, dma_offset; + struct hns_roce_hem *hem; + struct page *page = NULL; + + if (!table->lowmem) + return NULL; + + mutex_lock(&table->mutex); + idx = (obj & (table->num_obj - 1)) * table->obj_size; + hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; + + if (!hem) + goto out; + + list_for_each_entry(chunk, &hem->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > + (u32)dma_offset) + *dma_handle = sg_dma_address( + &chunk->mem[i]) + dma_offset; + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + + if (chunk->mem[i].length > (u32)offset) { + page = sg_page(&chunk->mem[i]); + goto out; + } + offset -= chunk->mem[i].length; + } + } + +out: + mutex_unlock(&table->mutex); + return page ? lowmem_page_address(page) + offset : NULL; +} + +int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end) +{ + unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; + unsigned long i = 0; + int ret = 0; + + /* Allocate MTT entry memory according to chunk(128K) */ + for (i = start; i <= end; i += inc) { + ret = hns_roce_table_get(hr_dev, table, i); + if (ret) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + hns_roce_table_put(hr_dev, table, i); + } + return ret; +} + +void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end) +{ + unsigned long i; + + for (i = start; i <= end; + i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size) + hns_roce_table_put(hr_dev, table, i); +} + +int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, u32 type, + unsigned long obj_size, unsigned long nobj, + int use_lowmem) +{ + unsigned long obj_per_chunk; + unsigned long num_hem; + + obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; + num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; + + table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); + if (!table->hem) + return -ENOMEM; + + table->type = type; + table->num_hem = num_hem; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + mutex_init(&table->mutex); + + return 0; +} + +void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long i; + + for (i = 0; i < table->num_hem; ++i) + if (table->hem[i]) { + if (hns_roce_clear_hem(hr_dev, table, + i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)) + dev_err(dev, "Clear HEM base address failed.\n"); + + hns_roce_free_hem(hr_dev, table->hem[i]); + } + + kfree(table->hem); +} + +void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h new file mode 100644 index 00000000000000..ad6617588fba9a --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_HEM_H +#define _HNS_ROCE_HEM_H + +enum { + /* MAP HEM(Hardware Entry Memory) */ + HEM_TYPE_QPC = 0, + HEM_TYPE_MTPT, + HEM_TYPE_CQC, + HEM_TYPE_SRQC, + + /* UNMAP HEM */ + HEM_TYPE_MTT, + HEM_TYPE_IRRL, +}; + +#define HNS_ROCE_HEM_CHUNK_LEN \ + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) + +enum { + HNS_ROCE_HEM_PAGE_SHIFT = 12, + HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT, +}; + +struct hns_roce_hem_chunk { + struct list_head list; + int npages; + int nsg; + struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN]; +}; + +struct hns_roce_hem { + struct list_head chunk_list; + int refcount; +}; + +struct hns_roce_hem_iter { + struct hns_roce_hem *hem; + struct hns_roce_hem_chunk *chunk; + int page_idx; +}; + +void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem); +int hns_roce_table_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj); +void hns_roce_table_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj); +void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, + dma_addr_t *dma_handle); +int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end); +void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end); +int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, u32 type, + unsigned long obj_size, unsigned long nobj, + int use_lowmem); +void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table); +void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev); + +static inline void hns_roce_hem_first(struct hns_roce_hem *hem, + struct hns_roce_hem_iter *iter) +{ + iter->hem = hem; + iter->chunk = list_empty(&hem->chunk_list) ? NULL : + list_entry(hem->chunk_list.next, + struct hns_roce_hem_chunk, list); + iter->page_idx = 0; +} + +static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter) +{ + return !iter->chunk; +} + +static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->hem->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct hns_roce_hem_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) +{ + return sg_dma_address(&iter->chunk->mem[iter->page_idx]); +} + +#endif /*_HNS_ROCE_HEM_H*/ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c new file mode 100644 index 00000000000000..b52f3badf855c5 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -0,0 +1,2781 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" +#include "hns_roce_hw_v1.h" + +static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->lkey = cpu_to_le32(sg->lkey); + dseg->addr = cpu_to_le64(sg->addr); + dseg->len = cpu_to_le32(sg->length); +} + +static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, + u32 rkey) +{ + rseg->raddr = cpu_to_le64(remote_addr); + rseg->rkey = cpu_to_le32(rkey); + rseg->len = 0; +} + +int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); + struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL; + struct hns_roce_wqe_ctrl_seg *ctrl = NULL; + struct hns_roce_wqe_data_seg *dseg = NULL; + struct hns_roce_qp *qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_sq_db sq_db; + int ps_opcode = 0, i = 0; + unsigned long flags = 0; + void *wqe = NULL; + u32 doorbell[2]; + int nreq = 0; + u32 ind = 0; + int ret = 0; + + spin_lock_irqsave(&qp->sq.lock, flags); + + ind = qp->sq_next_wqe; + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = + wr->wr_id; + + /* Corresponding to the RC and RD type wqe process separately */ + if (ibqp->qp_type == IB_QPT_GSI) { + ud_sq_wqe = wqe; + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_0_M, + UD_SEND_WQE_U32_4_DMAC_0_S, + ah->av.mac[0]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_1_M, + UD_SEND_WQE_U32_4_DMAC_1_S, + ah->av.mac[1]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_2_M, + UD_SEND_WQE_U32_4_DMAC_2_S, + ah->av.mac[2]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_3_M, + UD_SEND_WQE_U32_4_DMAC_3_S, + ah->av.mac[3]); + + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_DMAC_4_M, + UD_SEND_WQE_U32_8_DMAC_4_S, + ah->av.mac[4]); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_DMAC_5_M, + UD_SEND_WQE_U32_8_DMAC_5_S, + ah->av.mac[5]); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_OPERATION_TYPE_M, + UD_SEND_WQE_U32_8_OPERATION_TYPE_S, + HNS_ROCE_WQE_OPCODE_SEND); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M, + UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S, + 2); + roce_set_bit(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S, + 1); + + ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ? + cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | + ((wr->opcode == IB_WR_SEND_WITH_IMM) ? + cpu_to_le32(HNS_ROCE_WQE_IMM) : 0); + + roce_set_field(ud_sq_wqe->u32_16, + UD_SEND_WQE_U32_16_DEST_QP_M, + UD_SEND_WQE_U32_16_DEST_QP_S, + ud_wr(wr)->remote_qpn); + roce_set_field(ud_sq_wqe->u32_16, + UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M, + UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S, + ah->av.stat_rate); + + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_FLOW_LABEL_M, + UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_SL_SHIFT); + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_SGID_INDEX_M, + UD_SEND_WQE_U32_36_SGID_INDEX_S, + hns_get_gid_index(hr_dev, qp->port, + ah->av.gid_index)); + + roce_set_field(ud_sq_wqe->u32_40, + UD_SEND_WQE_U32_40_HOP_LIMIT_M, + UD_SEND_WQE_U32_40_HOP_LIMIT_S, + ah->av.hop_limit); + roce_set_field(ud_sq_wqe->u32_40, + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); + + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); + + ud_sq_wqe->va0_l = (u32)wr->sg_list[0].addr; + ud_sq_wqe->va0_h = (wr->sg_list[0].addr) >> 32; + ud_sq_wqe->l_key0 = wr->sg_list[0].lkey; + + ud_sq_wqe->va1_l = (u32)wr->sg_list[1].addr; + ud_sq_wqe->va1_h = (wr->sg_list[1].addr) >> 32; + ud_sq_wqe->l_key1 = wr->sg_list[1].lkey; + ind++; + } else if (ibqp->qp_type == IB_QPT_RC) { + ctrl = wqe; + memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); + for (i = 0; i < wr->num_sge; i++) + ctrl->msg_length += wr->sg_list[i].length; + + ctrl->sgl_pa_h = 0; + ctrl->flag = 0; + ctrl->imm_data = send_ieth(wr); + + /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* SO wait for conforming application scenarios */ + ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? + cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | + ((wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ? + cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) | + (wr->send_flags & IB_SEND_FENCE ? + (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0); + + wqe = (struct hns_roce_wqe_ctrl_seg *)wqe + + sizeof(struct hns_roce_wqe_ctrl_seg); + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); + break; + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + case IB_WR_SEND_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; + break; + case IB_WR_LOCAL_INV: + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_LSO: + default: + ps_opcode = HNS_ROCE_WQE_OPCODE_MASK; + break; + } + ctrl->flag |= cpu_to_le32(ps_opcode); + wqe = (struct hns_roce_wqe_raddr_seg *)wqe + + sizeof(struct hns_roce_wqe_raddr_seg); + + dseg = wqe; + if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (ctrl->msg_length > + hr_dev->caps.max_sq_inline) { + ret = -EINVAL; + *bad_wr = wr; + dev_err(dev, "inline len(1-%d)=%d, illegal", + ctrl->msg_length, + hr_dev->caps.max_sq_inline); + goto out; + } + for (i = 0; i < wr->num_sge; i++) { + memcpy(wqe, ((void *) (uintptr_t) + wr->sg_list[i].addr), + wr->sg_list[i].length); + wqe = (struct hns_roce_wqe_raddr_seg *) + wqe + wr->sg_list[i].length; + } + ctrl->flag |= HNS_ROCE_WQE_INLINE; + } else { + /*sqe num is two */ + for (i = 0; i < wr->num_sge; i++) + set_data_seg(dseg + i, wr->sg_list + i); + + ctrl->flag |= cpu_to_le32(wr->num_sge << + HNS_ROCE_WQE_SGE_NUM_BIT); + } + ind++; + } else { + dev_dbg(dev, "unSupported QP type\n"); + break; + } + } + +out: + /* Set DB return */ + if (likely(nreq)) { + qp->sq.head += nreq; + /* Memory barrier */ + wmb(); + + sq_db.u32_4 = 0; + sq_db.u32_8 = 0; + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, + SQ_DOORBELL_U32_4_SQ_HEAD_S, + (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M, + SQ_DOORBELL_U32_4_PORT_S, qp->port); + roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M, + SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn); + roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1); + + doorbell[0] = sq_db.u32_4; + doorbell[1] = sq_db.u32_8; + + hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + qp->sq_next_wqe = ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return ret; +} + +int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int ret = 0; + int nreq = 0; + int ind = 0; + int i = 0; + u32 reg_val = 0; + unsigned long flags = 0; + struct hns_roce_rq_wqe_ctrl *ctrl = NULL; + struct hns_roce_wqe_data_seg *scat = NULL; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_rq_db rq_db; + uint32_t doorbell[2] = {0}; + + spin_lock_irqsave(&hr_qp->rq.lock, flags); + ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&hr_qp->rq, nreq, + hr_qp->ibqp.recv_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + ctrl = get_recv_wqe(hr_qp, ind); + + roce_set_field(ctrl->rwqe_byte_12, + RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, + RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S, + wr->num_sge); + + scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1); + + for (i = 0; i < wr->num_sge; i++) + set_data_seg(scat + i, wr->sg_list + i); + + hr_qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + hr_qp->rq.head += nreq; + /* Memory barrier */ + wmb(); + + if (ibqp->qp_type == IB_QPT_GSI) { + /* SW update GSI rq header */ + reg_val = roce_read(to_hr_dev(ibqp->device), + ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->port); + roce_set_field(reg_val, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, + hr_qp->rq.head); + roce_write(to_hr_dev(ibqp->device), + ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->port, reg_val); + } else { + rq_db.u32_4 = 0; + rq_db.u32_8 = 0; + + roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M, + RQ_DOORBELL_U32_4_RQ_HEAD_S, + hr_qp->rq.head); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M, + RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M, + RQ_DOORBELL_U32_8_CMD_S, 1); + roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S, + 1); + + doorbell[0] = rq_db.u32_4; + doorbell[1] = rq_db.u32_8; + + hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + } + } + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + + return ret; +} + +static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, + int sdb_mode, int odb_mode) +{ + u32 val; + + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); +} + +static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, + u32 odb_mode) +{ + u32 val; + + /* Configure SDB/ODB extend mode */ + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); +} + +static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, + u32 sdb_alful) +{ + u32 val; + + /* Configure SDB */ + val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); + roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); + roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); +} + +static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, + u32 odb_alful) +{ + u32 val; + + /* Configure ODB */ + val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); + roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); + roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); +} + +static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, + u32 ext_sdb_alful) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t sdb_dma_addr; + u32 val; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + /* Configure extend SDB threshold */ + roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept); + roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful); + + /* Configure extend SDB base addr */ + sdb_dma_addr = db->ext_db->sdb_buf_list->map; + roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12)); + + /* Configure extend SDB depth */ + val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, + db->ext_db->esdb_dep); + /* + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); + + dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); + dev_dbg(dev, "ext SDB threshold: epmty: 0x%x, ful: 0x%x\n", + ext_sdb_alept, ext_sdb_alful); +} + +static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, + u32 ext_odb_alful) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t odb_dma_addr; + u32 val; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + /* Configure extend ODB threshold */ + roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept); + roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful); + + /* Configure extend ODB base addr */ + odb_dma_addr = db->ext_db->odb_buf_list->map; + roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12)); + + /* Configure extend ODB depth */ + val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); + roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, + db->ext_db->eodb_dep); + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, + db->ext_db->eodb_dep); + roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); + + dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); + dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n", + ext_odb_alept, ext_odb_alful); +} + +static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod, + u32 odb_ext_mod) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t sdb_dma_addr; + dma_addr_t odb_dma_addr; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL); + if (!db->ext_db) + return -ENOMEM; + + if (sdb_ext_mod) { + db->ext_db->sdb_buf_list = kmalloc( + sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL); + if (!db->ext_db->sdb_buf_list) { + ret = -ENOMEM; + goto ext_sdb_buf_fail_out; + } + + db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev, + HNS_ROCE_V1_EXT_SDB_SIZE, + &sdb_dma_addr, GFP_KERNEL); + if (!db->ext_db->sdb_buf_list->buf) { + ret = -ENOMEM; + goto alloc_sq_db_buf_fail; + } + db->ext_db->sdb_buf_list->map = sdb_dma_addr; + + db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH); + hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT, + HNS_ROCE_V1_EXT_SDB_ALFUL); + } else + hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT, + HNS_ROCE_V1_SDB_ALFUL); + + if (odb_ext_mod) { + db->ext_db->odb_buf_list = kmalloc( + sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL); + if (!db->ext_db->odb_buf_list) { + ret = -ENOMEM; + goto ext_odb_buf_fail_out; + } + + db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev, + HNS_ROCE_V1_EXT_ODB_SIZE, + &odb_dma_addr, GFP_KERNEL); + if (!db->ext_db->odb_buf_list->buf) { + ret = -ENOMEM; + goto alloc_otr_db_buf_fail; + } + db->ext_db->odb_buf_list->map = odb_dma_addr; + + db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH); + hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT, + HNS_ROCE_V1_EXT_ODB_ALFUL); + } else + hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT, + HNS_ROCE_V1_ODB_ALFUL); + + hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod); + + return 0; + +alloc_otr_db_buf_fail: + kfree(db->ext_db->odb_buf_list); + +ext_odb_buf_fail_out: + if (sdb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, + db->ext_db->sdb_buf_list->buf, + db->ext_db->sdb_buf_list->map); + } + +alloc_sq_db_buf_fail: + if (sdb_ext_mod) + kfree(db->ext_db->sdb_buf_list); + +ext_sdb_buf_fail_out: + kfree(db->ext_db); + return ret; +} + +static int hns_roce_db_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + u32 sdb_ext_mod; + u32 odb_ext_mod; + u32 sdb_evt_mod; + u32 odb_evt_mod; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + memset(db, 0, sizeof(*db)); + + /* Default DB mode */ + sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE; + odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE; + sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE; + odb_evt_mod = HNS_ROCE_ODB_POLL_MODE; + + db->sdb_ext_mod = sdb_ext_mod; + db->odb_ext_mod = odb_ext_mod; + + /* Init extend DB */ + ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod); + if (ret) { + dev_err(dev, "Failed in extend DB configuration.\n"); + return ret; + } + + hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod); + + return 0; +} + +static void hns_roce_db_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + if (db->sdb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, + db->ext_db->sdb_buf_list->buf, + db->ext_db->sdb_buf_list->map); + kfree(db->ext_db->sdb_buf_list); + } + + if (db->odb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE, + db->ext_db->odb_buf_list->buf, + db->ext_db->odb_buf_list->map); + kfree(db->ext_db->odb_buf_list); + } + + kfree(db->ext_db); +} + +static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) +{ + int ret; + int raq_shift = 0; + dma_addr_t addr; + u32 val; + struct hns_roce_v1_priv *priv; + struct hns_roce_raq_table *raq; + struct device *dev = &hr_dev->pdev->dev; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + raq = &priv->raq_table; + + raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL); + if (!raq->e_raq_buf) + return -ENOMEM; + + raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, + &addr, GFP_KERNEL); + if (!raq->e_raq_buf->buf) { + ret = -ENOMEM; + goto err_dma_alloc_raq; + } + raq->e_raq_buf->map = addr; + + /* Configure raq extended address. 48bit 4K align*/ + roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); + + /* Configure raq_shift */ + raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); + val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); + roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); + /* + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, + raq->e_raq_buf->map >> 44); + roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); + dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); + + /* Configure raq threshold */ + val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); + roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, + HNS_ROCE_V1_EXT_RAQ_WF); + roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); + dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); + + /* Enable extend raq */ + val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); + roce_set_field(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, + POL_TIME_INTERVAL_VAL); + roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, + 2); + roce_set_bit(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); + dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); + + /* Enable raq drop */ + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); + + return 0; + +err_dma_alloc_raq: + kfree(raq->e_raq_buf); + return ret; +} + +static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_raq_table *raq; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + raq = &priv->raq_table; + + dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf, + raq->e_raq_buf->map); + kfree(raq->e_raq_buf); +} + +static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) +{ + u32 val; + + if (enable_flag) { + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + /* Open all ports */ + roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + ROCEE_GLB_CFG_ROCEE_PORT_ST_S, + ALL_PORT_VAL_OPEN); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + } else { + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + /* Close all ports */ + roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + } +} + +/** + * hns_roce_v1_reset - reset RoCE + * @hr_dev: RoCE device struct pointer + * @enable: true -- drop reset, false -- reset + * return 0 - success , negative --fail + */ +int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool enable) +{ + struct device_node *dsaf_node; + struct device *dev = &hr_dev->pdev->dev; + struct device_node *np = dev->of_node; + int ret; + + dsaf_node = of_parse_phandle(np, "dsaf-handle", 0); + if (!dsaf_node) { + dev_err(dev, "Unable to get dsaf node by dsaf-handle!\n"); + return -EINVAL; + } + + ret = hns_dsaf_roce_reset(&dsaf_node->fwnode, false); + if (ret) + return ret; + + if (enable) { + msleep(SLEEP_TIME_INTERVAL); + return hns_dsaf_roce_reset(&dsaf_node->fwnode, true); + } + + return 0; +} + +void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) +{ + int i = 0; + struct hns_roce_caps *caps = &hr_dev->caps; + + hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); + hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, + ROCEE_VENDOR_PART_ID_REG)); + hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG)); + + hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_L_REG)) | + ((u64)le32_to_cpu(roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + + caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; + caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; + caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM; + caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM; + caps->max_sq_sg = HNS_ROCE_V1_SG_NUM; + caps->max_rq_sg = HNS_ROCE_V1_SG_NUM; + caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE; + caps->num_uars = HNS_ROCE_V1_UAR_NUM; + caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM; + caps->num_aeq_vectors = HNS_ROCE_AEQE_VEC_NUM; + caps->num_comp_vectors = HNS_ROCE_COMP_VEC_NUM; + caps->num_other_vectors = HNS_ROCE_AEQE_OF_VEC_NUM; + caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM; + caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS; + caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM; + caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA; + caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; + caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; + caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; + caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE; + caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; + caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; + caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; + caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; + caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE; + caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; + caps->sqp_start = 0; + caps->reserved_lkey = 0; + caps->reserved_pds = 0; + caps->reserved_mrws = 1; + caps->reserved_uars = 0; + caps->reserved_cqs = 0; + + for (i = 0; i < caps->num_ports; i++) + caps->pkey_table_len[i] = 1; + + for (i = 0; i < caps->num_ports; i++) { + /* Six ports shared 16 GID in v1 engine */ + if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports)) + caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / + caps->num_ports; + else + caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / + caps->num_ports + 1; + } + + for (i = 0; i < caps->num_comp_vectors; i++) + caps->ceqe_depth[i] = HNS_ROCE_V1_NUM_COMP_EQE; + + caps->aeqe_depth = HNS_ROCE_V1_NUM_ASYNC_EQE; + caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, + ROCEE_ACK_DELAY_REG)); + caps->max_mtu = IB_MTU_2048; +} + +int hns_roce_v1_init(struct hns_roce_dev *hr_dev) +{ + int ret; + u32 val; + struct device *dev = &hr_dev->pdev->dev; + + /* DMAE user config */ + val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); + roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); + roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, + 1 << PAGES_SHIFT_16); + roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); + + val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); + roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); + roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, + 1 << PAGES_SHIFT_16); + + ret = hns_roce_db_init(hr_dev); + if (ret) { + dev_err(dev, "doorbell init failed!\n"); + return ret; + } + + ret = hns_roce_raq_init(hr_dev); + if (ret) { + dev_err(dev, "raq init failed!\n"); + goto error_failed_raq_init; + } + + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); + + return 0; + +error_failed_raq_init: + hns_roce_db_free(hr_dev); + return ret; +} + +void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) +{ + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); + hns_roce_raq_free(hr_dev); + hns_roce_db_free(hr_dev); +} + +void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid) +{ + u32 *p = NULL; + u8 gid_idx = 0; + + gid_idx = hns_get_gid_index(hr_dev, port, gid_index); + + p = (u32 *)&gid->raw[0]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[4]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[8]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[0xc]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); +} + +void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) +{ + u32 reg_smac_l; + u16 reg_smac_h; + u16 *p_h; + u32 *p; + u32 val; + + p = (u32 *)(&addr[0]); + reg_smac_l = *p; + roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG + + PHY_PORT_OFFSET * phy_port); + + val = roce_read(hr_dev, + ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + p_h = (u16 *)(&addr[4]); + reg_smac_h = *p_h; + roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, + val); +} + +void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, + enum ib_mtu mtu) +{ + u32 val; + + val = roce_read(hr_dev, + ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, + val); +} + +int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx) +{ + struct hns_roce_v1_mpt_entry *mpt_entry; + struct scatterlist *sg; + u64 *pages; + int entry; + int i; + + /* MPT filled into mailbox buf */ + mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf; + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M, + MPT_BYTE_4_KEY_STATE_S, KEY_VALID); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M, + MPT_BYTE_4_KEY_S, mr->key); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M, + MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S, + (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M, + MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S, + (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S, + (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S, + (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S, + 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0); + + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, + MPT_BYTE_12_PBL_ADDR_H_S, 0); + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, + MPT_BYTE_12_MW_BIND_COUNTER_S, 0); + + mpt_entry->virt_addr_l = (u32)mr->iova; + mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); + mpt_entry->length = (u32)mr->size; + + roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, + MPT_BYTE_28_PD_S, mr->pd); + roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M, + MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx); + roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M, + MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT); + + /* DMA momery regsiter */ + if (mr->type == MR_TYPE_DMA) + return 0; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = 0; + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + pages[i] = ((u64)sg_dma_address(sg)) >> 12; + + /* Directly record to MTPT table firstly 7 entry */ + if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM) + break; + i++; + } + + /* Register user mr */ + for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) { + switch (i) { + case 0: + mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i])); + roce_set_field(mpt_entry->mpt_byte_36, + MPT_BYTE_36_PA0_H_M, + MPT_BYTE_36_PA0_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + break; + case 1: + roce_set_field(mpt_entry->mpt_byte_36, + MPT_BYTE_36_PA1_L_M, + MPT_BYTE_36_PA1_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_40, + MPT_BYTE_40_PA1_H_M, + MPT_BYTE_40_PA1_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + break; + case 2: + roce_set_field(mpt_entry->mpt_byte_40, + MPT_BYTE_40_PA2_L_M, + MPT_BYTE_40_PA2_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_44, + MPT_BYTE_44_PA2_H_M, + MPT_BYTE_44_PA2_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + break; + case 3: + roce_set_field(mpt_entry->mpt_byte_44, + MPT_BYTE_44_PA3_L_M, + MPT_BYTE_44_PA3_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_48, + MPT_BYTE_48_PA3_H_M, + MPT_BYTE_48_PA3_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + break; + case 4: + mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); + roce_set_field(mpt_entry->mpt_byte_56, + MPT_BYTE_56_PA4_H_M, + MPT_BYTE_56_PA4_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + break; + case 5: + roce_set_field(mpt_entry->mpt_byte_56, + MPT_BYTE_56_PA5_L_M, + MPT_BYTE_56_PA5_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_60, + MPT_BYTE_60_PA5_H_M, + MPT_BYTE_60_PA5_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + break; + case 6: + roce_set_field(mpt_entry->mpt_byte_60, + MPT_BYTE_60_PA6_L_M, + MPT_BYTE_60_PA6_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_64, + MPT_BYTE_64_PA6_H_M, + MPT_BYTE_64_PA6_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + break; + default: + break; + } + } + + free_page((unsigned long) pages); + + mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, + MPT_BYTE_12_PBL_ADDR_H_S, + ((u32)(mr->pbl_dma_addr >> 32))); + + return 0; +} + +static void *get_cqe(struct hns_roce_cq *hr_cq, int n) +{ + return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, + n * HNS_ROCE_V1_CQE_ENTRY_SIZE); +} + +static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) +{ + struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe); + + /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ + return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ + !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; +} + +static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) +{ + return get_sw_cqe(hr_cq, hr_cq->cons_index); +} + +void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index, + spinlock_t *doorbell_lock) + +{ + u32 doorbell[2]; + + doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn); + + hns_roce_write64_k(doorbell, hr_cq->cq_db_l); +} + +static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + struct hns_roce_cqe *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index); + ++prod_index) { + if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe) + break; + } + + /* + * Now backwards through the CQ, removing CQ entries + * that match our QP by overwriting them with next entries. + */ + while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { + cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe); + if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) & + HNS_ROCE_CQE_QPN_MASK) == qpn) { + /* In v1 engine, not support SRQ */ + ++nfreed; + } else if (nfreed) { + dest = get_cqe(hr_cq, (prod_index + nfreed) & + hr_cq->ib_cq.cqe); + owner_bit = roce_get_bit(dest->cqe_byte_4, + CQE_BYTE_4_OWNER_S); + memcpy(dest, cqe, sizeof(*cqe)); + roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S, + owner_bit); + } + } + + if (nfreed) { + hr_cq->cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + + hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index, + &to_hr_dev(hr_cq->ib_cq.device)->cq_db_lock); + } +} + +static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + spin_lock_irq(&hr_cq->lock); + __hns_roce_v1_cq_clean(hr_cq, qpn, srq); + spin_unlock_irq(&hr_cq->lock); +} + +void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, + dma_addr_t dma_handle, int nent, u32 vector) +{ + struct hns_roce_cq_context *cq_context = NULL; + void __iomem *tptr_addr; + + cq_context = mb_buf; + memset(cq_context, 0, sizeof(*cq_context)); + + tptr_addr = 0; + hr_dev->priv_addr = tptr_addr; + hr_cq->tptr_addr = tptr_addr; + + /* Register cq_context members */ + roce_set_field(cq_context->cqc_byte_4, + CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M, + CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); + roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, + CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); + cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); + + cq_context->cq_bt_l = (u32)dma_handle; + cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + + roce_set_field(cq_context->cqc_byte_12, + CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, + CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S, + ((u64)dma_handle >> 32)); + roce_set_field(cq_context->cqc_byte_12, + CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, + CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, + ilog2((unsigned int)nent)); + roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); + + cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); + cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, + cpu_to_le32((mtts[0]) >> 32)); + /* Dedicated hardware, directly set 0 */ + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, + CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0); + /** + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, + CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, + (u64)tptr_addr >> 44); + cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); + + cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12); + + roce_set_field(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, + CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S, + 0); + /*The initial value of cq's ci is 0 */ + roce_set_field(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, + CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); + cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); +} + +int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + u32 notification_flag; + u32 doorbell[2]; + int ret = 0; + + notification_flag = (flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; + /* + * flags = 0; Notification Flag = 1, next + * flags = 1; Notification Flag = 0, solocited + */ + doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, + hr_cq->cqn | notification_flag); + + hns_roce_write64_k(doorbell, hr_cq->cq_db_l); + + return ret; +} + +static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, + struct hns_roce_qp **cur_qp, struct ib_wc *wc) +{ + int qpn; + int is_send; + u16 wqe_ctr; + u32 status; + u32 opcode; + struct hns_roce_cqe *cqe; + struct hns_roce_qp *hr_qp; + struct hns_roce_wq *wq; + struct hns_roce_wqe_ctrl_seg *sq_wqe; + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + struct device *dev = &hr_dev->pdev->dev; + + /* Find cqe according consumer index */ + cqe = next_cqe_sw(hr_cq); + if (!cqe) + return -EAGAIN; + + ++hr_cq->cons_index; + /* Memory barrier */ + rmb(); + /* 0->SQ, 1->RQ */ + is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S)); + + /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */ + if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) <= 1) { + qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M, + CQE_BYTE_20_PORT_NUM_S) + + roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) * + HNS_ROCE_MAX_PORTS; + } else { + qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S); + } + + if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) { + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (unlikely(!hr_qp)) { + dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n", + hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK)); + return -EINVAL; + } + + *cur_qp = hr_qp; + } + + wc->qp = &(*cur_qp)->ibqp; + wc->vendor_err = 0; + + status = roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & + HNS_ROCE_CQE_STATUS_MASK; + switch (status) { + case HNS_ROCE_CQE_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* CQE status error, directly return */ + if (wc->status != IB_WC_SUCCESS) + return 0; + + if (is_send) { + /* SQ conrespond to CQE */ + sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S)); + switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) { + case HNS_ROCE_WQE_OPCODE_SEND: + wc->opcode = IB_WC_SEND; + break; + case HNS_ROCE_WQE_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + break; + case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case HNS_ROCE_WQE_OPCODE_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case HNS_ROCE_WQE_OPCODE_UD_SEND: + wc->opcode = IB_WC_SEND; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + wc->wc_flags = (sq_wqe->flag & HNS_ROCE_WQE_IMM ? + IB_WC_WITH_IMM : 0); + + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else { + /* RQ conrespond to CQE */ + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + opcode = roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_OPERATION_TYPE_M, + CQE_BYTE_4_OPERATION_TYPE_S) & + HNS_ROCE_CQE_OPCODE_MASK; + switch (opcode) { + case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu(cqe->immediate_data); + break; + case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: + if (roce_get_bit(cqe->cqe_byte_4, + CQE_BYTE_4_IMM_INDICATOR_S)) { + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu( + cqe->immediate_data); + } else { + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + } + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M, + CQE_BYTE_20_SL_S); + wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20, + CQE_BYTE_20_REMOTE_QPN_M, + CQE_BYTE_20_REMOTE_QPN_S); + wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20, + CQE_BYTE_20_GRH_PRESENT_S) ? + IB_WC_GRH : 0); + wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28, + CQE_BYTE_28_P_KEY_IDX_M, + CQE_BYTE_28_P_KEY_IDX_S); + } + + return 0; +} + +int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + struct hns_roce_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int ret = 0; + + spin_lock_irqsave(&hr_cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled); + if (ret) + break; + } + + if (npolled) { + hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index, + &to_hr_dev(ibcq->device)->cq_db_lock); + } + + spin_unlock_irqrestore(&hr_cq->lock, flags); + + if (ret == 0 || ret == -EAGAIN) + return npolled; + else + return ret; +} + +static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, + enum hns_roce_qp_state cur_state, + enum hns_roce_qp_state new_state, + struct hns_roce_qp_context *context, + struct hns_roce_qp *hr_qp) +{ + static const u16 + op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = { + [HNS_ROCE_QP_STATE_RST] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, + }, + [HNS_ROCE_QP_STATE_INIT] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + /* Note: In v1 engine, HW doesn't support RST2INIT. + * We use RST2INIT cmd instead of INIT2INIT. + */ + [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, + [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP, + }, + [HNS_ROCE_QP_STATE_RTR] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP, + }, + [HNS_ROCE_QP_STATE_RTS] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP, + [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP, + }, + [HNS_ROCE_QP_STATE_SQD] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP, + [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP, + }, + [HNS_ROCE_QP_STATE_ERR] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + } + }; + + struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = &hr_dev->pdev->dev; + int ret = 0; + + if (cur_state >= HNS_ROCE_QP_NUM_STATE || + new_state >= HNS_ROCE_QP_NUM_STATE || + !op[cur_state][new_state]) { + dev_err(dev, "[modify_qp]not support state %d to %d\n", + cur_state, new_state); + return -EINVAL; + } + + if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP) + return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, + HNS_ROCE_CMD_2RST_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + + if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP) + return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, + HNS_ROCE_CMD_2ERR_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, context, sizeof(*context)); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, + op[cur_state][new_state], + HNS_ROCE_CMD_TIME_CLASS_C); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_sqp_context *context; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t dma_handle = 0; + int rq_pa_start; + u32 reg_val; + u64 *mtts; + u32 *addr; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + /* Search QP buf's MTTs */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "qp buf pa find failed\n"); + goto out; + } + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + roce_set_field(context->qp1c_bytes_4, + QP1C_BYTES_4_SQ_WQE_SHIFT_M, + QP1C_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qp1c_bytes_4, + QP1C_BYTES_4_RQ_WQE_SHIFT_M, + QP1C_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, + QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); + + context->sq_rq_bt_l = (u32)(dma_handle); + roce_set_field(context->qp1c_bytes_12, + QP1C_BYTES_12_SQ_RQ_BT_H_M, + QP1C_BYTES_12_SQ_RQ_BT_H_S, + ((u32)(dma_handle >> 32))); + + roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, + QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); + roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M, + QP1C_BYTES_16_PORT_NUM_S, hr_qp->port); + roce_set_bit(context->qp1c_bytes_16, + QP1C_BYTES_16_SIGNALING_TYPE_S, + hr_qp->sq_signal_bits); + roce_set_bit(context->qp1c_bytes_16, + QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S, + hr_qp->sq_signal_bits); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, + 1); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, + 1); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S, + 0); + + roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M, + QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head); + roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, + QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); + + rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; + context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + + roce_set_field(context->qp1c_bytes_28, + QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, + QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, + (mtts[rq_pa_start]) >> 32); + roce_set_field(context->qp1c_bytes_28, + QP1C_BYTES_28_RQ_CUR_IDX_M, + QP1C_BYTES_28_RQ_CUR_IDX_S, 0); + + roce_set_field(context->qp1c_bytes_32, + QP1C_BYTES_32_RX_CQ_NUM_M, + QP1C_BYTES_32_RX_CQ_NUM_S, + to_hr_cq(ibqp->recv_cq)->cqn); + roce_set_field(context->qp1c_bytes_32, + QP1C_BYTES_32_TX_CQ_NUM_M, + QP1C_BYTES_32_TX_CQ_NUM_S, + to_hr_cq(ibqp->send_cq)->cqn); + + context->cur_sq_wqe_ba_l = (u32)mtts[0]; + + roce_set_field(context->qp1c_bytes_40, + QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, + QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + roce_set_field(context->qp1c_bytes_40, + QP1C_BYTES_40_SQ_CUR_IDX_M, + QP1C_BYTES_40_SQ_CUR_IDX_S, 0); + + /* Copy context to QP1C register */ + addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG + + hr_qp->port * sizeof(*context)); + + writel(context->qp1c_bytes_4, addr); + writel(context->sq_rq_bt_l, addr + 1); + writel(context->qp1c_bytes_12, addr + 2); + writel(context->qp1c_bytes_16, addr + 3); + writel(context->qp1c_bytes_20, addr + 4); + writel(context->cur_rq_wqe_ba_l, addr + 5); + writel(context->qp1c_bytes_28, addr + 6); + writel(context->qp1c_bytes_32, addr + 7); + writel(context->cur_sq_wqe_ba_l, addr + 8); + } + + /* Modify QP1C status */ + reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + + hr_qp->port * sizeof(*context)); + roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + + hr_qp->port * sizeof(*context), reg_val); + + hr_qp->state = new_state; + if (new_state == IB_QPS_RESET) { + hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), + hr_qp->qpn, NULL); + + hr_qp->rq.head = 0; + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; + hr_qp->sq_next_wqe = 0; + } + + kfree(context); + return 0; + +out: + kfree(context); + return -EINVAL; +} + +static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_context *context; + struct hns_roce_rq_db rq_db; + dma_addr_t dma_handle_2 = 0; + dma_addr_t dma_handle = 0; + uint32_t doorbell[2] = {0}; + int rq_pa_start = 0; + u32 reg_val = 0; + u64 *mtts_2 = NULL; + int ret = -EINVAL; + u64 *mtts = NULL; + int port; + u8 *dmac; + u8 *smac; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + /* Search qp buf's mtts */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (mtts == NULL) { + dev_err(dev, "qp buf pa find failed\n"); + goto out; + } + + /* Search IRRL's mtts */ + mtts_2 = hns_roce_table_find(&hr_dev->qp_table.irrl_table, hr_qp->qpn, + &dma_handle_2); + if (mtts_2 == NULL) { + dev_err(dev, "qp irrl_table find failed\n"); + goto out; + } + + /* + *Reset to init + * Mandatory param: + * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS + * Optional param: NA + */ + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, + to_hr_qp_type(hr_qp->ibqp.qp_type)); + + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) + ); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) + ); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_PD_M, + QP_CONTEXT_QPC_BYTES_4_PD_S, + to_hr_pd(ibqp->pd)->pdn); + hr_qp->access_flags = attr->qp_access_flags; + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, + to_hr_cq(ibqp->send_cq)->cqn); + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, + to_hr_cq(ibqp->recv_cq)->cqn); + + if (ibqp->srq) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, + to_hr_srq(ibqp->srq)->srqn); + + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + attr->pkey_index); + hr_qp->pkey_index = attr->pkey_index; + roce_set_field(context->qpc_bytes_16, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); + + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, + to_hr_qp_type(hr_qp->ibqp.qp_type)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); + if (attr_mask & IB_QP_ACCESS_FLAGS) { + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE)); + } else { + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(hr_qp->access_flags & + IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(hr_qp->access_flags & + IB_ACCESS_REMOTE_WRITE)); + } + + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_PD_M, + QP_CONTEXT_QPC_BYTES_4_PD_S, + to_hr_pd(ibqp->pd)->pdn); + + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, + to_hr_cq(ibqp->send_cq)->cqn); + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, + to_hr_cq(ibqp->recv_cq)->cqn); + + if (ibqp->srq) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, + to_hr_srq(ibqp->srq)->srqn); + if (attr_mask & IB_QP_PKEY_INDEX) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + attr->pkey_index); + else + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + hr_qp->pkey_index); + + roce_set_field(context->qpc_bytes_16, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_PKEY_INDEX) || + (attr_mask & IB_QP_QKEY)) { + dev_err(dev, "INIT2RTR attr_mask error\n"); + goto out; + } + + dmac = (u8 *)attr->ah_attr.dmac; + + context->sq_rq_bt_l = (u32)(dma_handle); + roce_set_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, + QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, + ((u32)(dma_handle >> 32))); + roce_set_bit(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, + 1); + roce_set_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, + attr->min_rnr_timer); + context->irrl_ba_l = (u32)(dma_handle_2); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, + ((u32)(dma_handle_2 >> 32)) & + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M, + QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0); + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S, + 1); + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, + hr_qp->sq_signal_bits); + + for (port = 0; port < hr_dev->caps.num_ports; port++) { + smac = (u8 *)hr_dev->dev_addr[port]; + dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n", + smac[0], smac[1], smac[2], smac[3], smac[4], + smac[5]); + if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) && + (dmac[2] == smac[2]) && (dmac[3] == smac[3]) && + (dmac[4] == smac[4]) && (dmac[5] == smac[5])) { + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, + 1); + break; + } + } + + if (hr_dev->loop_idc == 0x1) + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); + + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S, + attr->ah_attr.ah_flags); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S, + ilog2((unsigned int)attr->max_dest_rd_atomic)); + + roce_set_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_S, + attr->dest_qp_num); + + /* Configure GID index */ + roce_set_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S, + hns_get_gid_index(hr_dev, + attr->ah_attr.port_num - 1, + attr->ah_attr.grh.sgid_index)); + + memcpy(&(context->dmac_l), dmac, 4); + + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_S, + *((u16 *)(&dmac[4]))); + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M, + QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S, + attr->ah_attr.static_rate); + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_S, + attr->ah_attr.grh.hop_limit); + + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S, + attr->ah_attr.grh.flow_label); + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_TCLASS_M, + QP_CONTEXT_QPC_BYTES_48_TCLASS_S, + attr->ah_attr.grh.traffic_class); + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_MTU_M, + QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu); + + memcpy(context->dgid, attr->ah_attr.grh.dgid.raw, + sizeof(attr->ah_attr.grh.dgid.raw)); + + dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l, + roce_get_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)); + + roce_set_field(context->qpc_bytes_68, + QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M, + QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S, 0); + roce_set_field(context->qpc_bytes_68, + QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); + + rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; + context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + + roce_set_field(context->qpc_bytes_76, + QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, + mtts[rq_pa_start] >> 32); + roce_set_field(context->qpc_bytes_76, + QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, + QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); + + context->rx_rnr_time = 0; + + roce_set_field(context->qpc_bytes_84, + QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M, + QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S, + attr->rq_psn - 1); + roce_set_field(context->qpc_bytes_84, + QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M, + QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0); + + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S, + attr->rq_psn); + roce_set_bit(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0); + roce_set_bit(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0); + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S, + 0); + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S, + 0); + + context->dma_length = 0; + context->r_key = 0; + context->va_l = 0; + context->va_h = 0; + + roce_set_field(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0); + roce_set_bit(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0); + roce_set_bit(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0); + + roce_set_field(context->qpc_bytes_112, + QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M, + QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0); + roce_set_field(context->qpc_bytes_112, + QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M, + QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0); + + /* For chip resp ack */ + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, + hr_qp->port); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl); + hr_qp->sl = attr->ah_attr.sl; + } else if (cur_state == IB_QPS_RTR && + new_state == IB_QPS_RTS) { + /* If exist optional param, return error */ + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_QKEY) || + (attr_mask & IB_QP_PATH_MIG_STATE) || + (attr_mask & IB_QP_CUR_STATE) || + (attr_mask & IB_QP_MIN_RNR_TIMER)) { + dev_err(dev, "RTR2RTS attr_mask error\n"); + goto out; + } + + context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + + roce_set_field(context->qpc_bytes_120, + QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + + roce_set_field(context->qpc_bytes_124, + QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, + QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0); + roce_set_field(context->qpc_bytes_124, + QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M, + QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0); + + roce_set_field(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S, + attr->sq_psn); + roce_set_bit(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0); + roce_set_field(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M, + QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S, + 0); + roce_set_bit(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0); + + roce_set_field(context->qpc_bytes_132, + QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M, + QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0); + roce_set_field(context->qpc_bytes_132, + QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M, + QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0); + + roce_set_field(context->qpc_bytes_136, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S, + attr->sq_psn); + roce_set_field(context->qpc_bytes_136, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S, + attr->sq_psn); + + roce_set_field(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S, + (attr->sq_psn >> SQ_PSN_SHIFT)); + roce_set_field(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0); + roce_set_bit(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0); + + roce_set_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, + attr->qp_state); + + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M, + QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S, 0); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S, 0); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_LSN_M, + QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100); + + context->rnr_retry = 0; + + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M, + QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S, + attr->retry_cnt); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S, + attr->timeout); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M, + QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S, + attr->rnr_retry); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, + hr_qp->port); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl); + hr_qp->sl = attr->ah_attr.sl; + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S, + ilog2((unsigned int)attr->max_rd_atomic)); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M, + QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0); + context->pkt_use_len = 0; + + roce_set_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn); + roce_set_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M, + QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0); + + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S, + attr->sq_psn); + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M, + QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0); + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M, + QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0); + context->sge_use_len = 0; + + roce_set_field(context->qpc_bytes_176, + QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0); + roce_set_field(context->qpc_bytes_176, + QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S, + 0); + roce_set_field(context->qpc_bytes_180, + QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0); + roce_set_field(context->qpc_bytes_180, + QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, + QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); + + context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + + roce_set_field(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + roce_set_bit(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); + roce_set_field(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S, + 0); + } else if ((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { + roce_set_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, + attr->qp_state); + + } else { + dev_err(dev, "not support this modify\n"); + goto out; + } + + /* Every status migrate must change state */ + roce_set_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state); + + /* SW pass context to HW */ + ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, + to_hns_roce_state(cur_state), + to_hns_roce_state(new_state), context, + hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_modify failed\n"); + goto out; + } + + /* + * Use rst2init to instead of init2init with drv, + * need to hw to flash RQ HEAD by DB again + */ + if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { + /* Memory barrier */ + wmb(); + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { + /* SW update GSI rq header */ + reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->port); + roce_set_field(reg_val, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, + hr_qp->rq.head); + roce_write(hr_dev, ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->port, reg_val); + } else { + rq_db.u32_4 = 0; + rq_db.u32_8 = 0; + + roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M, + RQ_DOORBELL_U32_4_RQ_HEAD_S, + hr_qp->rq.head); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M, + RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M, + RQ_DOORBELL_U32_8_CMD_S, 1); + roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S, + 1); + + doorbell[0] = rq_db.u32_4; + doorbell[1] = rq_db.u32_8; + + hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + } + } + + hr_qp->state = new_state; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + hr_qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + hr_qp->port = (attr->port_num - 1); + + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), + hr_qp->qpn, NULL); + + hr_qp->rq.head = 0; + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; + hr_qp->sq_next_wqe = 0; + } +out: + kfree(context); + return ret; +} + +int hns_roce_v1_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, + new_state); + else + return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state, + new_state); +} + +static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state) +{ + switch (state) { + case HNS_ROCE_QP_STATE_RST: + return IB_QPS_RESET; + case HNS_ROCE_QP_STATE_INIT: + return IB_QPS_INIT; + case HNS_ROCE_QP_STATE_RTR: + return IB_QPS_RTR; + case HNS_ROCE_QP_STATE_RTS: + return IB_QPS_RTS; + case HNS_ROCE_QP_STATE_SQD: + return IB_QPS_SQD; + case HNS_ROCE_QP_STATE_ERR: + return IB_QPS_ERR; + default: + return IB_QPS_ERR; + } +} + +static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_qp_context *hr_context) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, + HNS_ROCE_CMD_QUERY_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + if (!ret) + memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + else + dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n"); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + +int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_context *context; + int tmp_qp_state = 0; + int ret = 0; + int state; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + mutex_lock(&hr_qp->mutex); + + if (hr_qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context); + if (ret) { + dev_err(dev, "query qpc error\n"); + ret = -EINVAL; + goto out; + } + + state = roce_get_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S); + tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state); + if (tmp_qp_state == -1) { + dev_err(dev, "to_ib_qp_state error\n"); + ret = -EINVAL; + goto out; + } + hr_qp->state = (u8)tmp_qp_state; + qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; + qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_MTU_M, + QP_CONTEXT_QPC_BYTES_48_MTU_S); + qp_attr->path_mig_state = IB_MIG_ARMED; + if (hr_qp->ibqp.qp_type == IB_QPT_UD) + qp_attr->qkey = QKEY_VAL; + + qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S); + qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S); + qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_S); + qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) | + ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) | + ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3); + + if (hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UC) { + qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S); + qp_attr->ah_attr.grh.flow_label = roce_get_field( + context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S); + qp_attr->ah_attr.grh.sgid_index = roce_get_field( + context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S); + qp_attr->ah_attr.grh.hop_limit = roce_get_field( + context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_S); + qp_attr->ah_attr.grh.traffic_class = roce_get_field( + context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_TCLASS_M, + QP_CONTEXT_QPC_BYTES_48_TCLASS_S); + + memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid, + sizeof(qp_attr->ah_attr.grh.dgid.raw)); + } + + qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); + qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1; + qp_attr->sq_draining = 0; + qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S); + qp_attr->max_dest_rd_atomic = roce_get_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S); + qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)); + qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)); + qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); + qp_attr->rnr_retry = context->rnr_retry; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + qp_init_attr->cap = qp_attr->cap; + +out: + mutex_unlock(&hr_qp->mutex); + kfree(context); + return ret; +} + +static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + int is_user) +{ + u32 sdbinvcnt; + unsigned long end = 0; + u32 sdbinvcnt_val; + u32 sdbsendptr_val; + u32 sdbisusepr_val; + struct hns_roce_cq *send_cq, *recv_cq; + struct device *dev = &hr_dev->pdev->dev; + + if (hr_qp->ibqp.qp_type == IB_QPT_RC) { + if (hr_qp->state != IB_QPS_RESET) { + /* + * Set qp to ERR, + * waiting for hw complete processing all dbs + */ + if (hns_roce_v1_qp_modify(hr_dev, NULL, + to_hns_roce_state( + (enum ib_qp_state)hr_qp->state), + HNS_ROCE_QP_STATE_ERR, NULL, + hr_qp)) + dev_err(dev, "modify QP %06lx to ERR failed.\n", + hr_qp->qpn); + + /* Record issued doorbell */ + sdbisusepr_val = roce_read(hr_dev, + ROCEE_SDB_ISSUE_PTR_REG); + /* + * Query db process status, + * until hw process completely + */ + end = msecs_to_jiffies( + HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies; + do { + sdbsendptr_val = roce_read(hr_dev, + ROCEE_SDB_SEND_PTR_REG); + if (!time_before(jiffies, end)) { + dev_err(dev, "destroy qp(0x%lx) timeout!!!", + hr_qp->qpn); + break; + } + } while ((short)(roce_get_field(sdbsendptr_val, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) - + roce_get_field(sdbisusepr_val, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) + ) < 0); + + /* Get list pointer */ + sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + + /* Query db's list status, until hw reversal */ + do { + sdbinvcnt_val = roce_read(hr_dev, + ROCEE_SDB_INV_CNT_REG); + if (!time_before(jiffies, end)) { + dev_err(dev, "destroy qp(0x%lx) timeout!!!", + hr_qp->qpn); + dev_err(dev, "SdbInvCnt = 0x%x\n", + sdbinvcnt_val); + break; + } + } while ((short)(roce_get_field(sdbinvcnt_val, + ROCEE_SDB_INV_CNT_SDB_INV_CNT_M, + ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) - + (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0); + + /* Modify qp to reset before destroying qp */ + if (hns_roce_v1_qp_modify(hr_dev, NULL, + to_hns_roce_state( + (enum ib_qp_state)hr_qp->state), + HNS_ROCE_QP_STATE_RST, NULL, hr_qp)) + dev_err(dev, "modify QP %06lx to RESET failed.\n", + hr_qp->qpn); + } + } + + send_cq = to_hr_cq(hr_qp->ibqp.send_cq); + recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); + + hns_roce_lock_cqs(send_cq, recv_cq); + + if (!is_user) { + __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? + to_hr_srq(hr_qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); + } + + hns_roce_qp_remove(hr_dev, hr_qp); + + hns_roce_unlock_cqs(send_cq, recv_cq); + + hns_roce_qp_free(hr_dev, hr_qp); + + /* Not special_QP, free their QPN */ + if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || + (hr_qp->ibqp.qp_type == IB_QPT_UC) || + (hr_qp->ibqp.qp_type == IB_QPT_UD)) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + + hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + + if (is_user) { + ib_umem_release(hr_qp->umem); + } else { + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + } +} + +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + kfree(hr_to_hr_sqp(hr_qp)); + else + kfree(hr_qp); + + return 0; +} + +struct hns_roce_v1_priv hr_v1_priv; + +struct hns_roce_hw hns_roce_hw_v1 = { + .reset = hns_roce_v1_reset, + .hw_profile = hns_roce_v1_profile, + .hw_init = hns_roce_v1_init, + .hw_exit = hns_roce_v1_exit, + .set_gid = hns_roce_v1_set_gid, + .set_mac = hns_roce_v1_set_mac, + .set_mtu = hns_roce_v1_set_mtu, + .write_mtpt = hns_roce_v1_write_mtpt, + .write_cqc = hns_roce_v1_write_cqc, + .modify_qp = hns_roce_v1_modify_qp, + .query_qp = hns_roce_v1_query_qp, + .destroy_qp = hns_roce_v1_destroy_qp, + .post_send = hns_roce_v1_post_send, + .post_recv = hns_roce_v1_post_recv, + .req_notify_cq = hns_roce_v1_req_notify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .priv = &hr_v1_priv, +}; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h new file mode 100644 index 00000000000000..2b3bf212570b00 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -0,0 +1,981 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_HW_V1_H +#define _HNS_ROCE_HW_V1_H + +#define CQ_STATE_VALID 2 + +#define HNS_ROCE_V1_MAX_PD_NUM 0x8000 +#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000 +#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000 + +#define HNS_ROCE_V1_MAX_QP_NUM 0x40000 +#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000 + +#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000 + +#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000 + +#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128 +#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128 + +#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64 +#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64 +#define HNS_ROCE_V1_SG_NUM 2 +#define HNS_ROCE_V1_INLINE_SIZE 32 + +#define HNS_ROCE_V1_UAR_NUM 256 +#define HNS_ROCE_V1_PHY_UAR_NUM 8 + +#define HNS_ROCE_V1_GID_NUM 16 + +#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000 +#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400 + +#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 +#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 +#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 +#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 +#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 + +#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 + +#define HNS_ROCE_V1_EXT_RAQ_WF 8 +#define HNS_ROCE_V1_RAQ_ENTRY 64 +#define HNS_ROCE_V1_RAQ_DEPTH 32768 +#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH) + +#define HNS_ROCE_V1_SDB_DEPTH 0x400 +#define HNS_ROCE_V1_ODB_DEPTH 0x400 + +#define HNS_ROCE_V1_DB_RSVD 0x80 + +#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) + +#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000 +#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000 +#define HNS_ROCE_V1_EXT_SDB_ENTRY 16 +#define HNS_ROCE_V1_EXT_ODB_ENTRY 16 +#define HNS_ROCE_V1_EXT_SDB_SIZE \ + (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY) +#define HNS_ROCE_V1_EXT_ODB_SIZE \ + (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY) + +#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_EXT_SDB_ALFUL \ + (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_EXT_ODB_ALFUL \ + (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) + +#define HNS_ROCE_ODB_POLL_MODE 0 + +#define HNS_ROCE_SDB_NORMAL_MODE 0 +#define HNS_ROCE_SDB_EXTEND_MODE 1 + +#define HNS_ROCE_ODB_EXTEND_MODE 1 + +#define KEY_VALID 0x02 + +#define HNS_ROCE_CQE_QPN_MASK 0x3ffff +#define HNS_ROCE_CQE_STATUS_MASK 0x1f +#define HNS_ROCE_CQE_OPCODE_MASK 0xf + +#define HNS_ROCE_CQE_SUCCESS 0x00 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03 +#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04 +#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05 +#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a +#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b +#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c + +#define QP1C_CFGN_OFFSET 0x28 +#define PHY_PORT_OFFSET 0x8 +#define MTPT_IDX_SHIFT 16 +#define ALL_PORT_VAL_OPEN 0x3f +#define POL_TIME_INTERVAL_VAL 0x80 +#define SLEEP_TIME_INTERVAL 20 +#define SQ_PSN_SHIFT 8 +#define QKEY_VAL 0x80010000 +#define SDB_INV_CNT_OFFSET 8 + +struct hns_roce_cq_context { + u32 cqc_byte_4; + u32 cq_bt_l; + u32 cqc_byte_12; + u32 cur_cqe_ba0_l; + u32 cqc_byte_20; + u32 cqe_tptr_addr_l; + u32 cur_cqe_ba1_l; + u32 cqc_byte_32; +}; + +#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0 +#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \ + (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S) + +#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16 +#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \ + (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20 +#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \ + (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24 +#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16 +#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8 +#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S) + +#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S) + +#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9 + +#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8 +#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14 +#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15 + +#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16 +#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S) + +struct hns_roce_cqe { + u32 cqe_byte_4; + union { + u32 r_key; + u32 immediate_data; + }; + u32 byte_cnt; + u32 cqe_byte_16; + u32 cqe_byte_20; + u32 s_mac_l; + u32 cqe_byte_28; + u32 reserved; +}; + +#define CQE_BYTE_4_OWNER_S 7 +#define CQE_BYTE_4_SQ_RQ_FLAG_S 14 + +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8 +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \ + (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) + +#define CQE_BYTE_4_WQE_INDEX_S 16 +#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S) + +#define CQE_BYTE_4_OPERATION_TYPE_S 0 +#define CQE_BYTE_4_OPERATION_TYPE_M \ + (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S) + +#define CQE_BYTE_4_IMM_INDICATOR_S 15 + +#define CQE_BYTE_16_LOCAL_QPN_S 0 +#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S) + +#define CQE_BYTE_20_PORT_NUM_S 26 +#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S) + +#define CQE_BYTE_20_SL_S 24 +#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S) + +#define CQE_BYTE_20_REMOTE_QPN_S 0 +#define CQE_BYTE_20_REMOTE_QPN_M \ + (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S) + +#define CQE_BYTE_20_GRH_PRESENT_S 29 + +#define CQE_BYTE_28_P_KEY_IDX_S 16 +#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S) + +#define CQ_DB_REQ_NOT_SOL 0 +#define CQ_DB_REQ_NOT (1 << 16) + +struct hns_roce_v1_mpt_entry { + u32 mpt_byte_4; + u32 pbl_addr_l; + u32 mpt_byte_12; + u32 virt_addr_l; + u32 virt_addr_h; + u32 length; + u32 mpt_byte_28; + u32 pa0_l; + u32 mpt_byte_36; + u32 mpt_byte_40; + u32 mpt_byte_44; + u32 mpt_byte_48; + u32 pa4_l; + u32 mpt_byte_56; + u32 mpt_byte_60; + u32 mpt_byte_64; +}; + +#define MPT_BYTE_4_KEY_STATE_S 0 +#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S) + +#define MPT_BYTE_4_KEY_S 8 +#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S) + +#define MPT_BYTE_4_PAGE_SIZE_S 16 +#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S) + +#define MPT_BYTE_4_MW_TYPE_S 20 + +#define MPT_BYTE_4_MW_BIND_ENABLE_S 21 + +#define MPT_BYTE_4_OWN_S 22 + +#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24 +#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \ + (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S) + +#define MPT_BYTE_4_REMOTE_ATOMIC_S 26 +#define MPT_BYTE_4_LOCAL_WRITE_S 27 +#define MPT_BYTE_4_REMOTE_WRITE_S 28 +#define MPT_BYTE_4_REMOTE_READ_S 29 +#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30 +#define MPT_BYTE_4_ADDRESS_TYPE_S 31 + +#define MPT_BYTE_12_PBL_ADDR_H_S 0 +#define MPT_BYTE_12_PBL_ADDR_H_M \ + (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S) + +#define MPT_BYTE_12_MW_BIND_COUNTER_S 17 +#define MPT_BYTE_12_MW_BIND_COUNTER_M \ + (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S) + +#define MPT_BYTE_28_PD_S 0 +#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S) + +#define MPT_BYTE_28_L_KEY_IDX_L_S 16 +#define MPT_BYTE_28_L_KEY_IDX_L_M \ + (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S) + +#define MPT_BYTE_36_PA0_H_S 0 +#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S) + +#define MPT_BYTE_36_PA1_L_S 8 +#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S) + +#define MPT_BYTE_40_PA1_H_S 0 +#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S) + +#define MPT_BYTE_40_PA2_L_S 16 +#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S) + +#define MPT_BYTE_44_PA2_H_S 0 +#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S) + +#define MPT_BYTE_44_PA3_L_S 24 +#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S) + +#define MPT_BYTE_48_PA3_H_S 0 +#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S) + +#define MPT_BYTE_56_PA4_H_S 0 +#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S) + +#define MPT_BYTE_56_PA5_L_S 8 +#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S) + +#define MPT_BYTE_60_PA5_H_S 0 +#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S) + +#define MPT_BYTE_60_PA6_L_S 16 +#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S) + +#define MPT_BYTE_64_PA6_H_S 0 +#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S) + +#define MPT_BYTE_64_L_KEY_IDX_H_S 24 +#define MPT_BYTE_64_L_KEY_IDX_H_M \ + (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S) + +struct hns_roce_wqe_ctrl_seg { + __be32 sgl_pa_h; + __be32 flag; + __be32 imm_data; + __be32 msg_length; +}; + +struct hns_roce_wqe_data_seg { + __be64 addr; + __be32 lkey; + __be32 len; +}; + +struct hns_roce_wqe_raddr_seg { + __be32 rkey; + __be32 len;/* reserved */ + __be64 raddr; +}; + +struct hns_roce_rq_wqe_ctrl { + + u32 rwqe_byte_4; + u32 rocee_sgl_ba_l; + u32 rwqe_byte_12; + u32 reserved[5]; +}; + +#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16 +#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \ + (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S) + +#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000 + +#define GID_LEN 16 + +struct hns_roce_ud_send_wqe { + u32 dmac_h; + u32 u32_8; + u32 immediate_data; + + u32 u32_16; + union { + unsigned char dgid[GID_LEN]; + struct { + u32 u32_20; + u32 u32_24; + u32 u32_28; + u32 u32_32; + }; + }; + + u32 u32_36; + u32 u32_40; + + u32 va0_l; + u32 va0_h; + u32 l_key0; + + u32 va1_l; + u32 va1_h; + u32 l_key1; +}; + +#define UD_SEND_WQE_U32_4_DMAC_0_S 0 +#define UD_SEND_WQE_U32_4_DMAC_0_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S) + +#define UD_SEND_WQE_U32_4_DMAC_1_S 8 +#define UD_SEND_WQE_U32_4_DMAC_1_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S) + +#define UD_SEND_WQE_U32_4_DMAC_2_S 16 +#define UD_SEND_WQE_U32_4_DMAC_2_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S) + +#define UD_SEND_WQE_U32_4_DMAC_3_S 24 +#define UD_SEND_WQE_U32_4_DMAC_3_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S) + +#define UD_SEND_WQE_U32_8_DMAC_4_S 0 +#define UD_SEND_WQE_U32_8_DMAC_4_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S) + +#define UD_SEND_WQE_U32_8_DMAC_5_S 8 +#define UD_SEND_WQE_U32_8_DMAC_5_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S) + +#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16 +#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \ + (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S) + +#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24 +#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \ + (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S) + +#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31 + +#define UD_SEND_WQE_U32_16_DEST_QP_S 0 +#define UD_SEND_WQE_U32_16_DEST_QP_M \ + (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S) + +#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24 +#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S) + +#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0 +#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \ + (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S) + +#define UD_SEND_WQE_U32_36_PRIORITY_S 20 +#define UD_SEND_WQE_U32_36_PRIORITY_M \ + (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S) + +#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24 +#define UD_SEND_WQE_U32_36_SGID_INDEX_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S) + +#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0 +#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S) + +#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8 +#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S) + +struct hns_roce_sqp_context { + u32 qp1c_bytes_4; + u32 sq_rq_bt_l; + u32 qp1c_bytes_12; + u32 qp1c_bytes_16; + u32 qp1c_bytes_20; + u32 qp1c_bytes_28; + u32 cur_rq_wqe_ba_l; + u32 qp1c_bytes_32; + u32 cur_sq_wqe_ba_l; + u32 qp1c_bytes_40; +}; + +#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8 +#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S) + +#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12 +#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S) + +#define QP1C_BYTES_4_PD_S 16 +#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S) + +#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0 +#define QP1C_BYTES_12_SQ_RQ_BT_H_M \ + (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S) + +#define QP1C_BYTES_16_RQ_HEAD_S 0 +#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S) + +#define QP1C_BYTES_16_PORT_NUM_S 16 +#define QP1C_BYTES_16_PORT_NUM_M \ + (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S) + +#define QP1C_BYTES_16_SIGNALING_TYPE_S 27 +#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28 +#define QP1C_BYTES_16_RQ_BA_FLG_S 29 +#define QP1C_BYTES_16_SQ_BA_FLG_S 30 +#define QP1C_BYTES_16_QP1_ERR_S 31 + +#define QP1C_BYTES_20_SQ_HEAD_S 0 +#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S) + +#define QP1C_BYTES_20_PKEY_IDX_S 16 +#define QP1C_BYTES_20_PKEY_IDX_M \ + (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S) + +#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0 +#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S) + +#define QP1C_BYTES_28_RQ_CUR_IDX_S 16 +#define QP1C_BYTES_28_RQ_CUR_IDX_M \ + (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S) + +#define QP1C_BYTES_32_TX_CQ_NUM_S 0 +#define QP1C_BYTES_32_TX_CQ_NUM_M \ + (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S) + +#define QP1C_BYTES_32_RX_CQ_NUM_S 16 +#define QP1C_BYTES_32_RX_CQ_NUM_M \ + (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S) + +#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0 +#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S) + +#define QP1C_BYTES_40_SQ_CUR_IDX_S 16 +#define QP1C_BYTES_40_SQ_CUR_IDX_M \ + (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S) + +#define HNS_ROCE_WQE_INLINE (1UL<<31) +#define HNS_ROCE_WQE_SE (1UL<<30) + +#define HNS_ROCE_WQE_SGE_NUM_BIT 24 +#define HNS_ROCE_WQE_IMM (1UL<<23) +#define HNS_ROCE_WQE_FENCE (1UL<<21) +#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20) + +#define HNS_ROCE_WQE_OPCODE_SEND (0<<16) +#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16) +#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16) +#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16) +#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16) +#define HNS_ROCE_WQE_OPCODE_MASK (15<<16) + +struct hns_roce_qp_context { + u32 qpc_bytes_4; + u32 qpc_bytes_8; + u32 qpc_bytes_12; + u32 qpc_bytes_16; + u32 sq_rq_bt_l; + u32 qpc_bytes_24; + u32 irrl_ba_l; + u32 qpc_bytes_32; + u32 qpc_bytes_36; + u32 dmac_l; + u32 qpc_bytes_44; + u32 qpc_bytes_48; + u8 dgid[16]; + u32 qpc_bytes_68; + u32 cur_rq_wqe_ba_l; + u32 qpc_bytes_76; + u32 rx_rnr_time; + u32 qpc_bytes_84; + u32 qpc_bytes_88; + union { + u32 rx_sge_len; + u32 dma_length; + }; + union { + u32 rx_sge_num; + u32 rx_send_pktn; + u32 r_key; + }; + u32 va_l; + u32 va_h; + u32 qpc_bytes_108; + u32 qpc_bytes_112; + u32 rx_cur_sq_wqe_ba_l; + u32 qpc_bytes_120; + u32 qpc_bytes_124; + u32 qpc_bytes_128; + u32 qpc_bytes_132; + u32 qpc_bytes_136; + u32 qpc_bytes_140; + u32 qpc_bytes_144; + u32 qpc_bytes_148; + union { + u32 rnr_retry; + u32 ack_time; + }; + u32 qpc_bytes_156; + u32 pkt_use_len; + u32 qpc_bytes_164; + u32 qpc_bytes_168; + union { + u32 sge_use_len; + u32 pa_use_len; + }; + u32 qpc_bytes_176; + u32 qpc_bytes_180; + u32 tx_cur_sq_wqe_ba_l; + u32 qpc_bytes_188; + u32 rvd21; +}; + +#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0 +#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S) + +#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3 +#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4 +#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5 +#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6 +#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7 + +#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8 +#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S) + +#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12 +#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S) + +#define QP_CONTEXT_QPC_BYTES_4_PD_S 16 +#define QP_CONTEXT_QPC_BYTES_4_PD_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S) + +#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0 +#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S) + +#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16 +#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S) + +#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0 +#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S) + +#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0 +#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S) + +#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0 +#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \ + (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S) + +#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18 +#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S) + +#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23 + +#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \ + (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18 +#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S) + +#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20 +#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21 +#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22 +#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23 + +#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24 +#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S) + +#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0 +#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S) + +#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24 +#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0 +#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S) + +#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16 +#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S) + +#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24 +#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S) + +#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0 +#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \ + (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S) + +#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20 +#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S) + +#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28 +#define QP_CONTEXT_QPC_BYTES_48_MTU_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S) + +#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0 +#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8 +#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24 +#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0 +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S) + +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24 +#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25 + +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26 +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \ + (((1UL << 2) - 1) << \ + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29 +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S) + +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24 +#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25 + +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24 +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S) + +#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0 +#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16 +#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S) + +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0 +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S) + +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24 + +#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25 +#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27 + +#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24 +#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S) + +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24 +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S) + +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0 +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S) + +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16 +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31 + +#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0 +#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S) + +#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0 +#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S) + +#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2 +#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S) + +#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5 +#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S) + +#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8 +#define QP_CONTEXT_QPC_BYTES_148_LSN_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S) + +#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0 +#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S) + +#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3 +#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S) + +#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8 +#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S) + +#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11 +#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + +#define QP_CONTEXT_QPC_BYTES_156_SL_S 14 +#define QP_CONTEXT_QPC_BYTES_156_SL_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S) + +#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16 +#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S) + +#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24 +#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S) + +#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24 +#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24 +#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S) + +#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26 +#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28 +#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29 +#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30 + +#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0 +#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0 +#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8 + +#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S) + +struct hns_roce_rq_db { + u32 u32_4; + u32 u32_8; +}; + +#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0 +#define RQ_DOORBELL_U32_4_RQ_HEAD_M \ + (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S) + +#define RQ_DOORBELL_U32_8_QPN_S 0 +#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S) + +#define RQ_DOORBELL_U32_8_CMD_S 28 +#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S) + +#define RQ_DOORBELL_U32_8_HW_SYNC_S 31 + +struct hns_roce_sq_db { + u32 u32_4; + u32 u32_8; +}; + +#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0 +#define SQ_DOORBELL_U32_4_SQ_HEAD_M \ + (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S) + +#define SQ_DOORBELL_U32_4_PORT_S 18 +#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S) + +#define SQ_DOORBELL_U32_8_QPN_S 0 +#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S) + +#define SQ_DOORBELL_HW_SYNC_S 31 + +struct hns_roce_ext_db { + int esdb_dep; + int eodb_dep; + struct hns_roce_buf_list *sdb_buf_list; + struct hns_roce_buf_list *odb_buf_list; +}; + +struct hns_roce_db_table { + int sdb_ext_mod; + int odb_ext_mod; + struct hns_roce_ext_db *ext_db; +}; + +struct hns_roce_v1_priv { + struct hns_roce_db_table db_table; + struct hns_roce_raq_table raq_table; +}; + +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c new file mode 100644 index 00000000000000..5b42ec85efa061 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -0,0 +1,1059 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_user.h" +#include "hns_roce_hem.h" + +/** + * hns_roce_addrconf_ifid_eui48 - Get default gid. + * @eui: eui. + * @vlan_id: gid + * @dev: net device + * Description: + * MAC convert to GID + * gid[0..7] = fe80 0000 0000 0000 + * gid[8] = mac[0] ^ 2 + * gid[9] = mac[1] + * gid[10] = mac[2] + * gid[11] = ff (VLAN ID high byte (4 MS bits)) + * gid[12] = fe (VLAN ID low byte) + * gid[13] = mac[3] + * gid[14] = mac[4] + * gid[15] = mac[5] + */ +static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, + struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid) +{ + memset(gid, 0, sizeof(*gid)); + gid->raw[0] = 0xFE; + gid->raw[1] = 0x80; + hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); +} + +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + +static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid) +{ + struct device *dev = &hr_dev->pdev->dev; + u8 gid_idx = 0; + + if (gid_index >= hr_dev->caps.gid_table_len[port]) { + dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n", + gid_index, port, hr_dev->caps.gid_table_len[port] - 1); + return -EINVAL; + } + + gid_idx = hns_get_gid_index(hr_dev, port, gid_index); + + if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid))) + return -EINVAL; + + memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid)); + + hr_dev->hw->set_gid(hr_dev, port, gid_index, gid); + + return 0; +} + +static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) +{ + u8 phy_port; + u32 i = 0; + + if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM)) + return; + + for (i = 0; i < MAC_ADDR_OCTET_NUM; i++) + hr_dev->dev_addr[port][i] = addr[i]; + + phy_port = hr_dev->iboe.phy_port[port]; + hr_dev->hw->set_mac(hr_dev, phy_port, addr); +} + +static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu) +{ + u8 phy_port = hr_dev->iboe.phy_port[port]; + enum ib_mtu tmp; + + tmp = iboe_get_mtu(mtu); + if (!tmp) + tmp = IB_MTU_256; + + hr_dev->hw->set_mtu(hr_dev, phy_port, tmp); +} + +static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port) +{ + struct ib_event event; + + /* Refresh gid in ib_cache */ + event.device = &hr_dev->ib_dev; + event.element.port_num = port + 1; + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); +} + +static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, + unsigned long event) +{ + struct device *dev = &hr_dev->pdev->dev; + struct net_device *netdev; + unsigned long flags; + union ib_gid gid; + int ret = 0; + + netdev = hr_dev->iboe.netdevs[port]; + if (!netdev) { + dev_err(dev, "port(%d) can't find netdev\n", port); + return -ENODEV; + } + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + switch (event) { + case NETDEV_UP: + case NETDEV_CHANGE: + case NETDEV_REGISTER: + case NETDEV_CHANGEADDR: + hns_roce_set_mac(hr_dev, port, netdev->dev_addr); + hns_roce_make_default_gid(netdev, &gid); + ret = hns_roce_set_gid(hr_dev, port, 0, &gid); + if (!ret) + hns_roce_update_gids(hr_dev, port); + break; + case NETDEV_DOWN: + /* + * In v1 engine, only support all ports closed together. + */ + break; + default: + dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); + break; + } + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + return ret; +} + +static int hns_roce_netdev_event(struct notifier_block *self, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct hns_roce_ib_iboe *iboe = NULL; + struct hns_roce_dev *hr_dev = NULL; + u8 port = 0; + int ret = 0; + + hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); + iboe = &hr_dev->iboe; + + for (port = 0; port < hr_dev->caps.num_ports; port++) { + if (dev == iboe->netdevs[port]) { + ret = handle_en_event(hr_dev, port, event); + if (ret) + return NOTIFY_DONE; + break; + } + } + + return NOTIFY_DONE; +} + +static void hns_roce_addr_event(int event, struct net_device *event_netdev, + struct hns_roce_dev *hr_dev, union ib_gid *gid) +{ + struct hns_roce_ib_iboe *iboe = NULL; + int gid_table_len = 0; + unsigned long flags; + union ib_gid zgid; + u8 gid_idx = 0; + u8 port = 0; + int i = 0; + int free; + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? + rdma_vlan_dev_real_dev(event_netdev) : + event_netdev; + + if (event != NETDEV_UP && event != NETDEV_DOWN) + return; + + iboe = &hr_dev->iboe; + while (port < hr_dev->caps.num_ports) { + if (real_dev == iboe->netdevs[port]) + break; + port++; + } + + if (port >= hr_dev->caps.num_ports) { + dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n"); + return; + } + + memset(zgid.raw, 0, sizeof(zgid.raw)); + free = -1; + gid_table_len = hr_dev->caps.gid_table_len[port]; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + for (i = 0; i < gid_table_len; i++) { + gid_idx = hns_get_gid_index(hr_dev, port, i); + if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw, + sizeof(gid->raw))) + break; + if (free < 0 && !memcmp(zgid.raw, + iboe->gid_table[gid_idx].raw, sizeof(zgid.raw))) + free = i; + } + + if (i >= gid_table_len) { + if (free < 0) { + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + dev_dbg(&hr_dev->pdev->dev, + "gid_index overflow, port(%d)\n", port); + return; + } + if (!hns_roce_set_gid(hr_dev, port, free, gid)) + hns_roce_update_gids(hr_dev, port); + } else if (event == NETDEV_DOWN) { + if (!hns_roce_set_gid(hr_dev, port, i, &zgid)) + hns_roce_update_gids(hr_dev, port); + } + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); +} + +static int hns_roce_inet_event(struct notifier_block *self, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct hns_roce_dev *hr_dev; + struct net_device *dev = ifa->ifa_dev->dev; + union ib_gid gid; + + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); + + hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet); + + hns_roce_addr_event(event, dev, hr_dev, &gid); + + return NOTIFY_DONE; +} + +static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev) +{ + struct in_ifaddr *ifa_list = NULL; + union ib_gid gid = {{0} }; + u32 ipaddr = 0; + int index = 0; + int ret = 0; + u8 i = 0; + + for (i = 0; i < hr_dev->caps.num_ports; i++) { + hns_roce_set_mtu(hr_dev, i, + ib_mtu_enum_to_int(hr_dev->caps.max_mtu)); + hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); + + if (hr_dev->iboe.netdevs[i]->ip_ptr) { + ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list; + index = 1; + while (ifa_list) { + ipaddr = ifa_list->ifa_address; + ipv6_addr_set_v4mapped(ipaddr, + (struct in6_addr *)&gid); + ret = hns_roce_set_gid(hr_dev, i, index, &gid); + if (ret) + break; + index++; + ifa_list = ifa_list->ifa_next; + } + hns_roce_update_gids(hr_dev, i); + } + } + + return ret; +} + +static int hns_roce_query_device(struct ib_device *ib_dev, + struct ib_device_attr *props, + struct ib_udata *uhw) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + memset(props, 0, sizeof(*props)); + + props->sys_image_guid = hr_dev->sys_image_guid; + props->max_mr_size = (u64)(~(0ULL)); + props->page_size_cap = hr_dev->caps.page_size_cap; + props->vendor_id = hr_dev->vendor_id; + props->vendor_part_id = hr_dev->vendor_part_id; + props->hw_ver = hr_dev->hw_rev; + props->max_qp = hr_dev->caps.num_qps; + props->max_qp_wr = hr_dev->caps.max_wqes; + props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_LOCAL_DMA_LKEY; + props->max_sge = hr_dev->caps.max_sq_sg; + props->max_sge_rd = 1; + props->max_cq = hr_dev->caps.num_cqs; + props->max_cqe = hr_dev->caps.max_cqes; + props->max_mr = hr_dev->caps.num_mtpts; + props->max_pd = hr_dev->caps.num_pds; + props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; + props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_pkeys = 1; + props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + + return 0; +} + +static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, + struct ib_port_attr *props) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct net_device *net_dev; + unsigned long flags; + enum ib_mtu mtu; + u8 port; + + assert(port_num > 0); + port = port_num - 1; + + memset(props, 0, sizeof(*props)); + + props->max_mtu = hr_dev->caps.max_mtu; + props->gid_tbl_len = hr_dev->caps.gid_table_len[port]; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | + IB_PORT_BOOT_MGMT_SUP; + props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN; + props->pkey_tbl_len = 1; + props->active_width = IB_WIDTH_4X; + props->active_speed = 1; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + net_dev = hr_dev->iboe.netdevs[port]; + if (!net_dev) { + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + dev_err(dev, "find netdev %d failed!\r\n", port); + return -EINVAL; + } + + mtu = iboe_get_mtu(net_dev->mtu); + props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; + props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3; + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; +} + +static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index, + union ib_gid *gid) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + u8 gid_idx = 0; + u8 port; + + if (port_num < 1 || port_num > hr_dev->caps.num_ports || + index >= hr_dev->caps.gid_table_len[port_num - 1]) { + dev_err(dev, + "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n", + port_num, index, hr_dev->caps.num_ports, + hr_dev->caps.gid_table_len[port_num - 1] - 1); + return -EINVAL; + } + + port = port_num - 1; + gid_idx = hns_get_gid_index(hr_dev, port, index); + if (gid_idx >= HNS_ROCE_MAX_GID_NUM) { + dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n", + port_num, index, HNS_ROCE_MAX_GID_NUM); + return -EINVAL; + } + + memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw, + HNS_ROCE_GID_SIZE); + + return 0; +} + +static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index, + u16 *pkey) +{ + *pkey = PKEY_ID; + + return 0; +} + +static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, + struct ib_device_modify *props) +{ + unsigned long flags; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + spin_lock_irqsave(&to_hr_dev(ib_dev)->sm_lock, flags); + memcpy(ib_dev->node_desc, props->node_desc, NODE_DESC_SIZE); + spin_unlock_irqrestore(&to_hr_dev(ib_dev)->sm_lock, flags); + } + + return 0; +} + +static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, + struct ib_port_modify *props) +{ + return 0; +} + +static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, + struct ib_udata *udata) +{ + int ret = 0; + struct hns_roce_ucontext *context; + struct hns_roce_ib_alloc_ucontext_resp resp; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + resp.qp_tab_size = hr_dev->caps.num_qps; + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_uar_alloc(hr_dev, &context->uar); + if (ret) + goto error_fail_uar_alloc; + + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) + goto error_fail_copy_to_udata; + + return &context->ibucontext; + +error_fail_copy_to_udata: + hns_roce_uar_free(hr_dev, &context->uar); + +error_fail_uar_alloc: + kfree(context); + + return ERR_PTR(ret); +} + +static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); + + hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar); + kfree(context); + + return 0; +} + +static int hns_roce_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) + return -EINVAL; + + if (vma->vm_pgoff == 0) { + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + to_hr_ucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + } else { + return -EINVAL; + } + + return 0; +} + +static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int ret; + + ret = hns_roce_query_port(ib_dev, port_num, &attr); + if (ret) + return ret; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; + + unregister_inetaddr_notifier(&iboe->nb_inet); + unregister_netdevice_notifier(&iboe->nb); + ib_unregister_device(&hr_dev->ib_dev); +} + +static int hns_roce_register_device(struct hns_roce_dev *hr_dev) +{ + int ret; + struct hns_roce_ib_iboe *iboe = NULL; + struct ib_device *ib_dev = NULL; + struct device *dev = &hr_dev->pdev->dev; + + iboe = &hr_dev->iboe; + + ib_dev = &hr_dev->ib_dev; + strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX); + + ib_dev->owner = THIS_MODULE; + ib_dev->node_type = RDMA_NODE_IB_CA; + ib_dev->dma_device = dev; + + ib_dev->phys_port_cnt = hr_dev->caps.num_ports; + ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; + ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; + ib_dev->uverbs_abi_ver = 1; + ib_dev->uverbs_cmd_mask = + (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ULL << IB_USER_VERBS_CMD_REG_MR) | + (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | + (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); + + /* HCA||device||port */ + ib_dev->modify_device = hns_roce_modify_device; + ib_dev->query_device = hns_roce_query_device; + ib_dev->query_port = hns_roce_query_port; + ib_dev->modify_port = hns_roce_modify_port; + ib_dev->get_link_layer = hns_roce_get_link_layer; + ib_dev->query_gid = hns_roce_query_gid; + ib_dev->query_pkey = hns_roce_query_pkey; + ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; + ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; + ib_dev->mmap = hns_roce_mmap; + + /* PD */ + ib_dev->alloc_pd = hns_roce_alloc_pd; + ib_dev->dealloc_pd = hns_roce_dealloc_pd; + + /* AH */ + ib_dev->create_ah = hns_roce_create_ah; + ib_dev->query_ah = hns_roce_query_ah; + ib_dev->destroy_ah = hns_roce_destroy_ah; + + /* QP */ + ib_dev->create_qp = hns_roce_create_qp; + ib_dev->modify_qp = hns_roce_modify_qp; + ib_dev->query_qp = hr_dev->hw->query_qp; + ib_dev->destroy_qp = hr_dev->hw->destroy_qp; + ib_dev->post_send = hr_dev->hw->post_send; + ib_dev->post_recv = hr_dev->hw->post_recv; + + /* CQ */ + ib_dev->create_cq = hns_roce_ib_create_cq; + ib_dev->destroy_cq = hns_roce_ib_destroy_cq; + ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; + ib_dev->poll_cq = hr_dev->hw->poll_cq; + + /* MR */ + ib_dev->get_dma_mr = hns_roce_get_dma_mr; + ib_dev->reg_user_mr = hns_roce_reg_user_mr; + ib_dev->dereg_mr = hns_roce_dereg_mr; + + /* OTHERS */ + ib_dev->get_port_immutable = hns_roce_port_immutable; + + ret = ib_register_device(ib_dev, NULL); + if (ret) { + dev_err(dev, "ib_register_device failed!\n"); + return ret; + } + + ret = hns_roce_setup_mtu_gids(hr_dev); + if (ret) { + dev_err(dev, "roce_setup_mtu_gids failed!\n"); + goto error_failed_setup_mtu_gids; + } + + spin_lock_init(&iboe->lock); + + iboe->nb.notifier_call = hns_roce_netdev_event; + ret = register_netdevice_notifier(&iboe->nb); + if (ret) { + dev_err(dev, "register_netdevice_notifier failed!\n"); + goto error_failed_setup_mtu_gids; + } + + iboe->nb_inet.notifier_call = hns_roce_inet_event; + ret = register_inetaddr_notifier(&iboe->nb_inet); + if (ret) { + dev_err(dev, "register inet addr notifier failed!\n"); + goto error_failed_register_inetaddr_notifier; + } + + return 0; + +error_failed_register_inetaddr_notifier: + unregister_netdevice_notifier(&iboe->nb); + +error_failed_setup_mtu_gids: + ib_unregister_device(ib_dev); + + return ret; +} + +static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) +{ + int i; + u8 phy_port; + int port_cnt = 0; + struct device *dev = &hr_dev->pdev->dev; + struct device_node *np = dev->of_node; + struct device_node *net_node; + struct net_device *netdev = NULL; + struct platform_device *pdev = NULL; + struct resource *res; + + if (of_device_is_compatible(np, "hisilicon,hns-roce-v1")) { + hr_dev->hw = &hns_roce_hw_v1; + } else { + dev_err(dev, "device no compatible!\n"); + return -EINVAL; + } + + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + hr_dev->reg_base = devm_ioremap_resource(dev, res); + if (!hr_dev->reg_base) + return -ENOMEM; + + for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { + net_node = of_parse_phandle(np, "eth-handle", i); + if (net_node) { + pdev = of_find_device_by_node(net_node); + netdev = platform_get_drvdata(pdev); + phy_port = (u8)i; + if (netdev) { + hr_dev->iboe.netdevs[port_cnt] = netdev; + hr_dev->iboe.phy_port[port_cnt] = phy_port; + } else { + return -ENODEV; + } + port_cnt++; + } + } + + if (port_cnt == 0) { + dev_err(dev, "Unable to get available port by eth-handle!\n"); + return -EINVAL; + } + + hr_dev->caps.num_ports = port_cnt; + + /* Cmd issue mode: 0 is poll, 1 is event */ + hr_dev->cmd_mod = 1; + hr_dev->loop_idc = 0; + + for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { + hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); + if (hr_dev->irq[i] <= 0) { + dev_err(dev, "Get No.%d irq resource failed!\n", i); + return -EINVAL; + } + + if (of_property_read_string_index(np, "interrupt-names", i, + &hr_dev->irq_names)) + return -EINVAL; + } + + return 0; +} + +static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) +{ + int ret; + struct device *dev = &hr_dev->pdev->dev; + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table, + HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_mtt_segs, 1); + if (ret) { + dev_err(dev, "Failed to init MTT context memory, aborting.\n"); + return ret; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, + HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, + hr_dev->caps.num_mtpts, 1); + if (ret) { + dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); + goto err_unmap_mtt; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, + HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, "Failed to init QP context memory, aborting.\n"); + goto err_unmap_dmpt; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.irrl_table, + HEM_TYPE_IRRL, + hr_dev->caps.irrl_entry_sz * + hr_dev->caps.max_qp_init_rdma, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, "Failed to init irrl_table memory, aborting.\n"); + goto err_unmap_qp; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table, + HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, + hr_dev->caps.num_cqs, 1); + if (ret) { + dev_err(dev, "Failed to init CQ context memory, aborting.\n"); + goto err_unmap_irrl; + } + + return 0; + +err_unmap_irrl: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); + +err_unmap_qp: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); + +err_unmap_dmpt: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); + +err_unmap_mtt: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); + + return ret; +} + +/** +* hns_roce_setup_hca - setup host channel adapter +* @hr_dev: pointer to hns roce device +* Return : int +*/ +static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) +{ + int ret; + struct device *dev = &hr_dev->pdev->dev; + + spin_lock_init(&hr_dev->sm_lock); + spin_lock_init(&hr_dev->cq_db_lock); + spin_lock_init(&hr_dev->bt_cmd_lock); + + ret = hns_roce_init_uar_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to initialize uar table. aborting\n"); + return ret; + } + + ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar); + if (ret) { + dev_err(dev, "Failed to allocate priv_uar.\n"); + goto err_uar_table_free; + } + + ret = hns_roce_init_pd_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init protected domain table.\n"); + goto err_uar_alloc_free; + } + + ret = hns_roce_init_mr_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init memory region table.\n"); + goto err_pd_table_free; + } + + ret = hns_roce_init_cq_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init completion queue table.\n"); + goto err_mr_table_free; + } + + ret = hns_roce_init_qp_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init queue pair table.\n"); + goto err_cq_table_free; + } + + return 0; + +err_cq_table_free: + hns_roce_cleanup_cq_table(hr_dev); + +err_mr_table_free: + hns_roce_cleanup_mr_table(hr_dev); + +err_pd_table_free: + hns_roce_cleanup_pd_table(hr_dev); + +err_uar_alloc_free: + hns_roce_uar_free(hr_dev, &hr_dev->priv_uar); + +err_uar_table_free: + hns_roce_cleanup_uar_table(hr_dev); + return ret; +} + +/** +* hns_roce_probe - RoCE driver entrance +* @pdev: pointer to platform device +* Return : int +* +*/ +static int hns_roce_probe(struct platform_device *pdev) +{ + int ret; + struct hns_roce_dev *hr_dev; + struct device *dev = &pdev->dev; + + hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + if (!hr_dev) + return -ENOMEM; + + memset((u8 *)hr_dev + sizeof(struct ib_device), 0, + sizeof(struct hns_roce_dev) - sizeof(struct ib_device)); + + hr_dev->pdev = pdev; + platform_set_drvdata(pdev, hr_dev); + + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { + dev_err(dev, "No usable DMA addressing mode\n"); + ret = -EIO; + goto error_failed_get_cfg; + } + + ret = hns_roce_get_cfg(hr_dev); + if (ret) { + dev_err(dev, "Get Configuration failed!\n"); + goto error_failed_get_cfg; + } + + ret = hr_dev->hw->reset(hr_dev, true); + if (ret) { + dev_err(dev, "Reset RoCE engine failed!\n"); + goto error_failed_get_cfg; + } + + hr_dev->hw->hw_profile(hr_dev); + + ret = hns_roce_cmd_init(hr_dev); + if (ret) { + dev_err(dev, "cmd init failed!\n"); + goto error_failed_cmd_init; + } + + ret = hns_roce_init_eq_table(hr_dev); + if (ret) { + dev_err(dev, "eq init failed!\n"); + goto error_failed_eq_table; + } + + if (hr_dev->cmd_mod) { + ret = hns_roce_cmd_use_events(hr_dev); + if (ret) { + dev_err(dev, "Switch to event-driven cmd failed!\n"); + goto error_failed_use_event; + } + } + + ret = hns_roce_init_hem(hr_dev); + if (ret) { + dev_err(dev, "init HEM(Hardware Entry Memory) failed!\n"); + goto error_failed_init_hem; + } + + ret = hns_roce_setup_hca(hr_dev); + if (ret) { + dev_err(dev, "setup hca failed!\n"); + goto error_failed_setup_hca; + } + + ret = hr_dev->hw->hw_init(hr_dev); + if (ret) { + dev_err(dev, "hw_init failed!\n"); + goto error_failed_engine_init; + } + + ret = hns_roce_register_device(hr_dev); + if (ret) + goto error_failed_register_device; + + return 0; + +error_failed_register_device: + hr_dev->hw->hw_exit(hr_dev); + +error_failed_engine_init: + hns_roce_cleanup_bitmap(hr_dev); + +error_failed_setup_hca: + hns_roce_cleanup_hem(hr_dev); + +error_failed_init_hem: + if (hr_dev->cmd_mod) + hns_roce_cmd_use_polling(hr_dev); + +error_failed_use_event: + hns_roce_cleanup_eq_table(hr_dev); + +error_failed_eq_table: + hns_roce_cmd_cleanup(hr_dev); + +error_failed_cmd_init: + ret = hr_dev->hw->reset(hr_dev, false); + if (ret) + dev_err(&hr_dev->pdev->dev, "roce_engine reset fail\n"); + +error_failed_get_cfg: + ib_dealloc_device(&hr_dev->ib_dev); + + return ret; +} + +/** +* hns_roce_remove - remove RoCE device +* @pdev: pointer to platform device +*/ +static int hns_roce_remove(struct platform_device *pdev) +{ + struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); + + hns_roce_unregister_device(hr_dev); + hr_dev->hw->hw_exit(hr_dev); + hns_roce_cleanup_bitmap(hr_dev); + hns_roce_cleanup_hem(hr_dev); + + if (hr_dev->cmd_mod) + hns_roce_cmd_use_polling(hr_dev); + + hns_roce_cleanup_eq_table(hr_dev); + hns_roce_cmd_cleanup(hr_dev); + hr_dev->hw->reset(hr_dev, false); + + ib_dealloc_device(&hr_dev->ib_dev); + + return 0; +} + +static const struct of_device_id hns_roce_of_match[] = { + { .compatible = "hisilicon,hns-roce-v1",}, + {}, +}; +MODULE_DEVICE_TABLE(of, hns_roce_of_match); + +static struct platform_driver hns_roce_driver = { + .probe = hns_roce_probe, + .remove = hns_roce_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = hns_roce_of_match, + }, +}; + +module_platform_driver(hns_roce_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Wei Hu "); +MODULE_AUTHOR("Nenglong Zhao "); +MODULE_AUTHOR("Lijun Ou "); +MODULE_DESCRIPTION("HNS RoCE Driver"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c new file mode 100644 index 00000000000000..59f5e2be046b32 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -0,0 +1,614 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +static u32 hw_index_to_key(unsigned long ind) +{ + return (u32)(ind >> 24) | (ind << 8); +} + +static unsigned long key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) +{ + return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, + HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_TIME_CLASS_B); +} + +static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) +{ + return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, + mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + HNS_ROCE_CMD_TIME_CLASS_B); +} + +static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, + unsigned long *seg) +{ + int o; + u32 m; + + spin_lock(&buddy->lock); + + for (o = order; o <= buddy->max_order; ++o) { + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + *seg = find_first_bit(buddy->bits[o], m); + if (*seg < m) + goto found; + } + } + spin_unlock(&buddy->lock); + return -1; + + found: + clear_bit(*seg, buddy->bits[o]); + --buddy->num_free[o]; + + while (o > order) { + --o; + *seg <<= 1; + set_bit(*seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; + } + + spin_unlock(&buddy->lock); + + *seg <<= order; + return 0; +} + +static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, + int order) +{ + seg >>= order; + + spin_lock(&buddy->lock); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + + set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; + + spin_unlock(&buddy->lock); +} + +static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *), + GFP_KERNEL); + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) + goto err_out; + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1 << (buddy->max_order - i)); + buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL); + if (!buddy->bits[i]) + goto err_out_free; + + bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); + } + + set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + +err_out: + kfree(buddy->bits); + kfree(buddy->num_free); + return -ENOMEM; +} + +static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); + kfree(buddy->num_free); +} + +static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, + unsigned long *seg) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + int ret = 0; + + ret = hns_roce_buddy_alloc(&mr_table->mtt_buddy, order, seg); + if (ret == -1) + return -1; + + if (hns_roce_table_get_range(hr_dev, &mr_table->mtt_table, *seg, + *seg + (1 << order) - 1)) { + hns_roce_buddy_free(&mr_table->mtt_buddy, *seg, order); + return -1; + } + + return 0; +} + +int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, + struct hns_roce_mtt *mtt) +{ + int ret = 0; + int i; + + /* Page num is zero, correspond to DMA memory register */ + if (!npages) { + mtt->order = -1; + mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; + return 0; + } + + /* Note: if page_shift is zero, FAST memory regsiter */ + mtt->page_shift = page_shift; + + /* Compute MTT entry necessary */ + for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; + i <<= 1) + ++mtt->order; + + /* Allocate MTT entry */ + ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg); + if (ret == -1) + return -ENOMEM; + + return 0; +} + +void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + if (mtt->order < 0) + return; + + hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); +} + +static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, + u64 size, u32 access, int npages, + struct hns_roce_mr *mr) +{ + unsigned long index = 0; + int ret = 0; + struct device *dev = &hr_dev->pdev->dev; + + /* Allocate a key for mr from mr_table */ + ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); + if (ret == -1) + return -ENOMEM; + + mr->iova = iova; /* MR va starting addr */ + mr->size = size; /* MR addr range */ + mr->pd = pd; /* MR num */ + mr->access = access; /* MR access permit */ + mr->enabled = 0; /* MR active status */ + mr->key = hw_index_to_key(index); /* MR key */ + + if (size == ~0ull) { + mr->type = MR_TYPE_DMA; + mr->pbl_buf = NULL; + mr->pbl_dma_addr = 0; + } else { + mr->type = MR_TYPE_MR; + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + } + + return 0; +} + +static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + struct device *dev = &hr_dev->pdev->dev; + int npages = 0; + int ret; + + if (mr->enabled) { + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) + & (hr_dev->caps.num_mtpts - 1)); + if (ret) + dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + } + + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf, + mr->pbl_dma_addr); + } + + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, + key_to_hw_index(mr->key)); +} + +static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + int ret; + unsigned long mtpt_idx = key_to_hw_index(mr->key); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + /* Prepare HEM entry memory */ + ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); + if (ret) + return ret; + + /* Allocate mailbox memory */ + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_table; + } + + ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); + if (ret) { + dev_err(dev, "Write mtpt fail!\n"); + goto err_page; + } + + ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + if (ret) { + dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + goto err_page; + } + + mr->enabled = 1; + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; + +err_page: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + +err_table: + hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); + return ret; +} + +static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, u32 start_index, + u32 npages, u64 *page_list) +{ + u32 i = 0; + __le64 *mtts = NULL; + dma_addr_t dma_handle; + u32 s = start_index * sizeof(u64); + + /* All MTTs must fit in the same page */ + if (start_index / (PAGE_SIZE / sizeof(u64)) != + (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64))) + return -EINVAL; + + if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) + return -EINVAL; + + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, + &dma_handle); + if (!mtts) + return -ENOMEM; + + /* Save page addr, low 12 bits : 0 */ + for (i = 0; i < npages; ++i) + mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT; + + return 0; +} + +static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, u32 start_index, + u32 npages, u64 *page_list) +{ + int chunk; + int ret; + + if (mtt->order < 0) + return -EINVAL; + + while (npages > 0) { + chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + + ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, + page_list); + if (ret) + return ret; + + npages -= chunk; + start_index += chunk; + page_list += chunk; + } + + return 0; +} + +int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) +{ + u32 i = 0; + int ret = 0; + u64 *page_list = NULL; + + page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + for (i = 0; i < buf->npages; ++i) { + if (buf->nbufs == 1) + page_list[i] = buf->direct.map + (i << buf->page_shift); + else + page_list[i] = buf->page_list[i].map; + + } + ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); + + kfree(page_list); + + return ret; +} + +int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + int ret = 0; + + ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, + hr_dev->caps.num_mtpts, + hr_dev->caps.num_mtpts - 1, + hr_dev->caps.reserved_mrws, 0); + if (ret) + return ret; + + ret = hns_roce_buddy_init(&mr_table->mtt_buddy, + ilog2(hr_dev->caps.num_mtt_segs)); + if (ret) + goto err_buddy; + + return 0; + +err_buddy: + hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); + return ret; +} + +void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); +} + +struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) +{ + int ret = 0; + struct hns_roce_mr *mr = NULL; + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (mr == NULL) + return ERR_PTR(-ENOMEM); + + /* Allocate memory region key */ + ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, + ~0ULL, acc, 0, mr); + if (ret) + goto err_free; + + ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + if (ret) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + hns_roce_mr_free(to_hr_dev(pd->device), mr); + +err_free: + kfree(mr); + return ERR_PTR(ret); +} + +int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct ib_umem *umem) +{ + struct scatterlist *sg; + int i, k, entry; + int ret = 0; + u64 *pages; + u32 n; + int len; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = n = 0; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + umem->page_size * k; + if (i == PAGE_SIZE / sizeof(u64)) { + ret = hns_roce_write_mtt(hr_dev, mtt, n, i, + pages); + if (ret) + goto out; + n += i; + i = 0; + } + } + } + + if (i) + ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); + +out: + free_page((unsigned long) pages); + return ret; +} + +static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr, + struct ib_umem *umem) +{ + int i = 0; + int entry; + struct scatterlist *sg; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; + i++; + } + + /* Memory barrier */ + mb(); + + return 0; +} + +struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_mr *mr = NULL; + int ret = 0; + int n = 0; + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(mr->umem)) { + ret = PTR_ERR(mr->umem); + goto err_free; + } + + n = ib_umem_page_count(mr->umem); + if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) { + dev_err(dev, "Just support 4K page size but is 0x%x now!\n", + mr->umem->page_size); + } + + if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { + dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", + length); + goto err_umem; + } + + ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, + access_flags, n, mr); + if (ret) + goto err_umem; + + ret = hns_roce_ib_umem_write_mr(mr, mr->umem); + if (ret) + goto err_mr; + + ret = hns_roce_mr_enable(hr_dev, mr); + if (ret) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + + return &mr->ibmr; + +err_mr: + hns_roce_mr_free(hr_dev, mr); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(ret); +} + +int hns_roce_dereg_mr(struct ib_mr *ibmr) +{ + struct hns_roce_mr *mr = to_hr_mr(ibmr); + + hns_roce_mr_free(to_hr_dev(ibmr->device), mr); + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return 0; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c new file mode 100644 index 00000000000000..16271b5bd1701d --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "hns_roce_device.h" + +static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long pd_number; + int ret = 0; + + ret = hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, &pd_number); + if (ret == -1) { + dev_err(dev, "alloc pdn from pdbitmap failed\n"); + return -ENOMEM; + } + + *pdn = pd_number; + + return 0; +} + +static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) +{ + hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn); +} + +int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) +{ + return hns_roce_bitmap_init(&hr_dev->pd_bitmap, hr_dev->caps.num_pds, + hr_dev->caps.num_pds - 1, + hr_dev->caps.reserved_pds, 0); +} + +void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); +} + +struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_pd *pd; + int ret; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); + if (ret) { + kfree(pd); + dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n"); + return ERR_PTR(ret); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pdn, sizeof(u64))) { + hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); + dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +int hns_roce_dealloc_pd(struct ib_pd *pd) +{ + hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); + kfree(to_hr_pd(pd)); + + return 0; +} + +int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) +{ + struct resource *res; + int ret = 0; + + /* Using bitmap to manager UAR index */ + ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index); + if (ret == -1) + return -ENOMEM; + + uar->index = (uar->index - 1) % hr_dev->caps.phy_num_uars + 1; + + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index; + + return 0; +} + +void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) +{ + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index); +} + +int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev) +{ + return hns_roce_bitmap_init(&hr_dev->uar_table.bitmap, + hr_dev->caps.num_uars, + hr_dev->caps.num_uars - 1, + hr_dev->caps.reserved_uars, 0); +} + +void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c new file mode 100644 index 00000000000000..645c18d809a5de --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -0,0 +1,855 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_hem.h" +#include "hns_roce_user.h" + +#define DB_REG_OFFSET 0x1000 +#define SQP_NUM 12 + +void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock(&qp_table->lock); + + if (!qp) { + dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + return; + } + + qp->event(qp, (enum hns_roce_event)event_type); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, + enum hns_roce_event type) +{ + struct ib_event event; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case HNS_ROCE_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n", + type, hr_qp->qpn); + return; + } + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, + int align, unsigned long *base) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + int ret = 0; + unsigned long qpn; + + ret = hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, &qpn); + if (ret == -1) + return -ENOMEM; + + *base = qpn; + + return 0; +} + +enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: + return HNS_ROCE_QP_STATE_RST; + case IB_QPS_INIT: + return HNS_ROCE_QP_STATE_INIT; + case IB_QPS_RTR: + return HNS_ROCE_QP_STATE_RTR; + case IB_QPS_RTS: + return HNS_ROCE_QP_STATE_RTS; + case IB_QPS_SQD: + return HNS_ROCE_QP_STATE_SQD; + case IB_QPS_ERR: + return HNS_ROCE_QP_STATE_ERR; + default: + return HNS_ROCE_QP_NUM_STATE; + } +} + +static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + int ret; + + if (!qpn) + return -EINVAL; + + hr_qp->qpn = qpn; + + spin_lock_irq(&qp_table->lock); + ret = radix_tree_insert(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); + spin_unlock_irq(&qp_table->lock); + if (ret) { + dev_err(&hr_dev->pdev->dev, "QPC radix_tree_insert failed\n"); + goto err_put_irrl; + } + + atomic_set(&hr_qp->refcount, 1); + init_completion(&hr_qp->free); + + return 0; + +err_put_irrl: + + return ret; +} + +static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct device *dev = &hr_dev->pdev->dev; + int ret; + + if (!qpn) + return -EINVAL; + + hr_qp->qpn = qpn; + + /* Alloc memory for QPC */ + ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn); + if (ret) { + dev_err(dev, "QPC table get failed\n"); + goto err_out; + } + + /* Alloc memory for IRRL */ + ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + if (ret) { + dev_err(dev, "IRRL table get failed\n"); + goto err_put_qp; + } + + spin_lock_irq(&qp_table->lock); + ret = radix_tree_insert(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); + spin_unlock_irq(&qp_table->lock); + if (ret) { + dev_err(dev, "QPC radix_tree_insert failed\n"); + goto err_put_irrl; + } + + atomic_set(&hr_qp->refcount, 1); + init_completion(&hr_qp->free); + + return 0; + +err_put_irrl: + hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + +err_put_qp: + hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); + +err_out: + return ret; +} + +void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&qp_table->lock, flags); + radix_tree_delete(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1)); + spin_unlock_irqrestore(&qp_table->lock, flags); +} + +void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + + if (atomic_dec_and_test(&hr_qp->refcount)) + complete(&hr_qp->free); + wait_for_completion(&hr_qp->free); + + if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { + hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); + } +} + +void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, + int cnt) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + + if (base_qpn < (hr_dev->caps.sqp_start + 2 * hr_dev->caps.num_ports)) + return; + + hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); +} + +static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, int is_user, int has_srq, + struct hns_roce_qp *hr_qp) +{ + u32 max_cnt; + struct device *dev = &hr_dev->pdev->dev; + + /* Check the validity of QP support capacity */ + if (cap->max_recv_wr > hr_dev->caps.max_wqes || + cap->max_recv_sge > hr_dev->caps.max_rq_sg) { + dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n", + cap->max_recv_wr, cap->max_recv_sge); + return -EINVAL; + } + + /* If srq exit, set zero for relative number of rq */ + if (has_srq) { + if (cap->max_recv_wr) { + dev_dbg(dev, "srq no need config max_recv_wr\n"); + return -EINVAL; + } + + hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + } else { + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { + dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); + return -EINVAL; + } + + /* In v1 engine, parameter verification procession */ + max_cnt = cap->max_recv_wr > HNS_ROCE_MIN_WQE_NUM ? + cap->max_recv_wr : HNS_ROCE_MIN_WQE_NUM; + hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + + if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "hns_roce_set_rq_size rq.wqe_cnt too large\n"); + return -EINVAL; + } + + max_cnt = max(1U, cap->max_recv_sge); + hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + /* WQE is fixed for 64B */ + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); + } + + cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_sge = hr_qp->rq.max_gs; + + return 0; +} + +static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) +{ + u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); + u8 max_sq_stride = ilog2(roundup_sq_stride); + + /* Sanity check SQ size before proceeding */ + if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || + ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + dev_err(&hr_dev->pdev->dev, "check SQ size error!\n"); + return -EINVAL; + } + + hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + + /* Get buf size, SQ and RQ are aligned to page_szie */ + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->rq.wqe_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + hr_qp->sq.offset = 0; + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + return 0; +} + +static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, + enum ib_qp_type type, + struct hns_roce_qp *hr_qp) +{ + struct device *dev = &hr_dev->pdev->dev; + u32 max_cnt; + (void)type; + + if (cap->max_send_wr > hr_dev->caps.max_wqes || + cap->max_send_sge > hr_dev->caps.max_sq_sg || + cap->max_inline_data > hr_dev->caps.max_sq_inline) { + dev_err(dev, "hns_roce_set_kernel_sq_size error1\n"); + return -EINVAL; + } + + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq_max_wqes_per_wr = 1; + hr_qp->sq_spare_wqes = 0; + + /* In v1 engine, parameter verification procession */ + max_cnt = cap->max_send_wr > HNS_ROCE_MIN_WQE_NUM ? + cap->max_send_wr : HNS_ROCE_MIN_WQE_NUM; + hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); + if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "hns_roce_set_kernel_sq_size sq.wqe_cnt too large\n"); + return -EINVAL; + } + + /* Get data_seg numbers */ + max_cnt = max(1U, cap->max_send_sge); + hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); + + /* Get buf size, SQ and RQ are aligned to page_szie */ + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->rq.wqe_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + hr_qp->sq.offset = 0; + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + /* Get wr and sge number which send */ + cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_sge = hr_qp->sq.max_gs; + + /* We don't support inline sends for kernel QPs (yet) */ + cap->max_inline_data = 0; + + return 0; +} + +static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, + struct ib_pd *ib_pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, unsigned long sqpn, + struct hns_roce_qp *hr_qp) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_ib_create_qp ucmd; + unsigned long qpn = 0; + int ret = 0; + + mutex_init(&hr_qp->mutex); + spin_lock_init(&hr_qp->sq.lock); + spin_lock_init(&hr_qp->rq.lock); + + hr_qp->state = IB_QPS_RESET; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; + else + hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; + + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, + !!init_attr->srq, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_set_rq_size failed\n"); + goto err_out; + } + + if (ib_pd->uobject) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + dev_err(dev, "ib_copy_from_udata error for create qp\n"); + ret = -EFAULT; + goto err_out; + } + + ret = hns_roce_set_user_sq_size(hr_dev, hr_qp, &ucmd); + if (ret) { + dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); + goto err_out; + } + + hr_qp->umem = ib_umem_get(ib_pd->uobject->context, + ucmd.buf_addr, hr_qp->buff_size, 0, + 0); + if (IS_ERR(hr_qp->umem)) { + dev_err(dev, "ib_umem_get error for create qp\n"); + ret = PTR_ERR(hr_qp->umem); + goto err_out; + } + + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem), + ilog2((unsigned int)hr_qp->umem->page_size), + &hr_qp->mtt); + if (ret) { + dev_err(dev, "hns_roce_mtt_init error for create qp\n"); + goto err_buf; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt, + hr_qp->umem); + if (ret) { + dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n"); + goto err_mtt; + } + } else { + if (init_attr->create_flags & + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + dev_err(dev, "init_attr->create_flags error!\n"); + ret = -EINVAL; + goto err_out; + } + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { + dev_err(dev, "init_attr->create_flags error!\n"); + ret = -EINVAL; + goto err_out; + } + + /* Set SQ size */ + ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap, + init_attr->qp_type, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_set_kernel_sq_size error!\n"); + goto err_out; + } + + /* QP doorbell register address */ + hr_qp->sq.db_reg_l = hr_dev->reg_base + ROCEE_DB_SQ_L_0_REG + + DB_REG_OFFSET * hr_dev->priv_uar.index; + hr_qp->rq.db_reg_l = hr_dev->reg_base + + ROCEE_DB_OTHERS_L_0_REG + + DB_REG_OFFSET * hr_dev->priv_uar.index; + + /* Allocate QP buf */ + if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2, + &hr_qp->hr_buf)) { + dev_err(dev, "hns_roce_buf_alloc error!\n"); + ret = -ENOMEM; + goto err_out; + } + + /* Write MTT */ + ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages, + hr_qp->hr_buf.page_shift, &hr_qp->mtt); + if (ret) { + dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n"); + goto err_buf; + } + + ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt, + &hr_qp->hr_buf); + if (ret) { + dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n"); + goto err_mtt; + } + + hr_qp->sq.wrid = kmalloc_array(hr_qp->sq.wqe_cnt, sizeof(u64), + GFP_KERNEL); + hr_qp->rq.wrid = kmalloc_array(hr_qp->rq.wqe_cnt, sizeof(u64), + GFP_KERNEL); + if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) { + ret = -ENOMEM; + goto err_wrid; + } + } + + if (sqpn) { + qpn = sqpn; + } else { + /* Get QPN */ + ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn); + if (ret) { + dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n"); + goto err_wrid; + } + } + + if ((init_attr->qp_type) == IB_QPT_GSI) { + ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_alloc failed!\n"); + goto err_qpn; + } + } else { + ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_alloc failed!\n"); + goto err_qpn; + } + } + + if (sqpn) + hr_qp->doorbell_qpn = 1; + else + hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); + + hr_qp->event = hns_roce_ib_qp_event; + + return 0; + +err_qpn: + if (!sqpn) + hns_roce_release_range_qp(hr_dev, qpn, 1); + +err_wrid: + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + +err_buf: + if (ib_pd->uobject) + ib_umem_release(hr_qp->umem); + else + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + +err_out: + return ret; +} + +struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_sqp *hr_sqp; + struct hns_roce_qp *hr_qp; + int ret; + + switch (init_attr->qp_type) { + case IB_QPT_RC: { + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, + hr_qp); + if (ret) { + dev_err(dev, "Create RC QP failed\n"); + kfree(hr_qp); + return ERR_PTR(ret); + } + + hr_qp->ibqp.qp_num = hr_qp->qpn; + + break; + } + case IB_QPT_GSI: { + /* Userspace is not allowed to create special QPs: */ + if (pd->uobject) { + dev_err(dev, "not support usr space GSI\n"); + return ERR_PTR(-EINVAL); + } + + hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); + if (!hr_sqp) + return ERR_PTR(-ENOMEM); + + hr_qp = &hr_sqp->hr_qp; + + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, + hr_dev->caps.sqp_start + + hr_dev->caps.num_ports + + init_attr->port_num - 1, hr_qp); + if (ret) { + dev_err(dev, "Create GSI QP failed!\n"); + kfree(hr_sqp); + return ERR_PTR(ret); + } + + hr_qp->port = (init_attr->port_num - 1); + hr_qp->ibqp.qp_num = hr_dev->caps.sqp_start + + hr_dev->caps.num_ports + + init_attr->port_num - 1; + break; + } + default:{ + dev_err(dev, "not support QP type %d\n", init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + } + + return &hr_qp->ibqp; +} + +int to_hr_qp_type(int qp_type) +{ + int transport_type; + + if (qp_type == IB_QPT_RC) + transport_type = SERV_TYPE_RC; + else if (qp_type == IB_QPT_UC) + transport_type = SERV_TYPE_UC; + else if (qp_type == IB_QPT_UD) + transport_type = SERV_TYPE_UD; + else if (qp_type == IB_QPT_GSI) + transport_type = SERV_TYPE_UD; + else + transport_type = -1; + + return transport_type; +} + +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + enum ib_qp_state cur_state, new_state; + struct device *dev = &hr_dev->pdev->dev; + int ret = -EINVAL; + int p; + + mutex_lock(&hr_qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + new_state = attr_mask & IB_QP_STATE ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + dev_err(dev, "ib_modify_qp_is_ok failed\n"); + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { + dev_err(dev, "attr port_num invalid.attr->port_num=%d\n", + attr->port_num); + goto out; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; + if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { + dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n", + attr->pkey_index); + goto out; + } + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { + dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", + attr->max_rd_atomic); + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { + dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", + attr->max_dest_rd_atomic); + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + goto out; + } + + ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state, + new_state); + +out: + mutex_unlock(&hr_qp->mutex); + + return ret; +} + +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq) __releases(&send_cq->lock) + __releases(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +__be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return cpu_to_le32(wr->ex.imm_data); + case IB_WR_SEND_WITH_INV: + return cpu_to_le32(wr->ex.invalidate_rkey); + default: + return 0; + } +} + +static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +{ + + return hns_roce_buf_offset(&hr_qp->hr_buf, offset); +} + +void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +{ + struct ib_qp *ibqp = &hr_qp->ibqp; + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + + if ((n < 0) || (n > hr_qp->rq.wqe_cnt)) { + dev_err(&hr_dev->pdev->dev, "rq wqe index:%d,rq wqe cnt:%d\r\n", + n, hr_qp->rq.wqe_cnt); + return NULL; + } + + return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); +} + +void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) +{ + struct ib_qp *ibqp = &hr_qp->ibqp; + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + + if ((n < 0) || (n > hr_qp->sq.wqe_cnt)) { + dev_err(&hr_dev->pdev->dev, "sq wqe index:%d,sq wqe cnt:%d\r\n", + n, hr_qp->sq.wqe_cnt); + return NULL; + } + + return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); +} + +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, + struct ib_cq *ib_cq) +{ + struct hns_roce_cq *hr_cq; + u32 cur; + + cur = hr_wq->head - hr_wq->tail; + if (likely(cur + nreq < hr_wq->max_post)) + return 0; + + hr_cq = to_hr_cq(ib_cq); + spin_lock(&hr_cq->lock); + cur = hr_wq->head - hr_wq->tail; + spin_unlock(&hr_cq->lock); + + return cur + nreq >= hr_wq->max_post; +} + +int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + int reserved_from_top = 0; + int ret; + + spin_lock_init(&qp_table->lock); + INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); + + /* A port include two SQP, six port total 12 */ + ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, + hr_dev->caps.num_qps - 1, + hr_dev->caps.sqp_start + SQP_NUM, + reserved_from_top); + if (ret) { + dev_err(&hr_dev->pdev->dev, "qp bitmap init failed!error=%d\n", + ret); + return ret; + } + + return 0; +} + +void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h new file mode 100644 index 00000000000000..a28f761a9f6579 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_user.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_USER_H +#define _HNS_ROCE_USER_H + +struct hns_roce_ib_create_cq { + __u64 buf_addr; +}; + +struct hns_roce_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; + __u8 reserved[5]; +}; + +struct hns_roce_ib_alloc_ucontext_resp { + __u32 qp_tab_size; +}; + +#endif /*_HNS_ROCE_USER_H */ From 8793f779cf6ecc83d4c3345ac821a2d0c95713ad Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 21 Jul 2016 19:06:56 +0800 Subject: [PATCH 03/75] IB/hns: Kconfig and Makefile for RoCE module This patch added Kconfig and Makefile for building RoCE module. Signed-off-by: Wei Hu Signed-off-by: Nenglong Zhao Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 1 + drivers/infiniband/hw/Makefile | 1 + drivers/infiniband/hw/hns/Kconfig | 10 ++++++++++ drivers/infiniband/hw/hns/Makefile | 8 ++++++++ 4 files changed, 20 insertions(+) create mode 100644 drivers/infiniband/hw/hns/Kconfig create mode 100644 drivers/infiniband/hw/hns/Makefile diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index e9b7dc037ff877..19a418a1b63125 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -74,6 +74,7 @@ source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" +source "drivers/infiniband/hw/hns/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c0c7cf8af3f4cc..21fe401ff178b9 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ +obj-$(CONFIG_INFINIBAND_HNS) += hns/ diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig new file mode 100644 index 00000000000000..e1a6e055cd607f --- /dev/null +++ b/drivers/infiniband/hw/hns/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_HNS + tristate "HNS RoCE Driver" + depends on NET_VENDOR_HISILICON + depends on ARM64 && HNS && HNS_DSAF && HNS_ENET + ---help--- + This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine + is used in Hisilicon Hi1610 and more further ICT SoC. + + To compile this driver as a module, choose M here: the module + will be called hns-roce. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile new file mode 100644 index 00000000000000..7e8ebd24dcaead --- /dev/null +++ b/drivers/infiniband/hw/hns/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the Hisilicon RoCE drivers. +# + +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o +hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \ + hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ + hns_roce_cq.o hns_roce_alloc.o hns_roce_hw_v1.o From 66a9bae67b98cf1df384bd86bb5ffa07ade1937a Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 21 Jul 2016 19:06:57 +0800 Subject: [PATCH 04/75] MAINTAINERS: Add maintainers for Hisilicon RoCE driver This patch added maintainers for RoCE driver. Signed-off-by: Wei Hu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 99f95279e45b32..87e23cda8dc1a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5567,6 +5567,14 @@ S: Maintained F: drivers/net/ethernet/hisilicon/ F: Documentation/devicetree/bindings/net/hisilicon*.txt +HISILICON ROCE DRIVER +M: Lijun Ou +M: Wei Hu(Xavier) +L: linux-rdma@vger.kernel.org +S: Maintained +F: drivers/infiniband/hw/hns/ +F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt + HISILICON SAS Controller M: John Garry W: http://www.hisilicon.com From 204f69ba64122b1ee6732bab8cfd71fe2d68c188 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 6 Aug 2016 01:01:24 +0000 Subject: [PATCH 05/75] IB/hns: Fix return value check in hns_roce_get_cfg() In case of error, the function devm_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5b42ec85efa061..6ead671966bd92 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -715,8 +715,8 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); hr_dev->reg_base = devm_ioremap_resource(dev, res); - if (!hr_dev->reg_base) - return -ENOMEM; + if (IS_ERR(hr_dev->reg_base)) + return PTR_ERR(hr_dev->reg_base); for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { net_node = of_parse_phandle(np, "eth-handle", i); From d605916b76593417340397fe281acd2e8a953706 Mon Sep 17 00:00:00 2001 From: Salil Date: Wed, 24 Aug 2016 04:44:49 +0800 Subject: [PATCH 06/75] net: hns: Add support of ACPI to HNS driver RoCE Reset function In the Hip06 SoC, the RoCE Engine is part of the Hisilicon Network Subsystem and is dependent upon DSAF module. Therefore, certain functions like RESET are exposed through the common registers of HNS DSAF module which are memory-mapped by the HNS driver and currently can only be accessed through DT/syscon interface. This patch adds the support of ACPI to the existing RoCE reset function in the HNS driver(please refer NOTE 2). Hisilicon RoCE driver (please refer NOTE 1) shall call this reset function during probe time to reset the RoCE Engine. The HNS Reset function indirectly ends up in calling the _DSM() function part of the DSDT ACPI Table. Actual reset functionality for ACPI is implemented within the ACPI DSDT Table which also has been enhanced to support this change. Support of ACPI in the HNS RoCE driver shall be pushed through a different accompanying below patch: "IB/hns: Add support of ACPI to the Hisilicon RoCE Driver" NOTE 1: HNS RoCE driver has already been accepted by its maintainer Doug Ledford. Please refer below link: https://www.spinics.net/lists/linux-rdma/msg38850.html NOTE 2: RoCE reset function patch has been accepted and now is part of the net-next: https://www.mail-archive.com/netdev@vger.kernel.org/msg123867.html Signed-off-by: Salil Mehta Reviewed-by: Yisen Zhuang Signed-off-by: Doug Ledford --- .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 17 +----- .../ethernet/hisilicon/hns/hns_dsaf_main.c | 47 +++++++++++----- .../ethernet/hisilicon/hns/hns_dsaf_main.h | 9 ++-- .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 53 +++++++++++++++++-- .../ethernet/hisilicon/hns/hns_dsaf_misc.h | 3 +- .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 4 +- 6 files changed, 92 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index a834774fdb02d0..a68eef0ee65ffa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -644,21 +644,6 @@ hns_mac_phy_parse_addr(struct device *dev, struct fwnode_handle *fwnode) return addr; } -static int hns_mac_phydev_match(struct device *dev, void *fwnode) -{ - return dev->fwnode == fwnode; -} - -static struct -platform_device *hns_mac_find_platform_device(struct fwnode_handle *fwnode) -{ - struct device *dev; - - dev = bus_find_device(&platform_bus_type, NULL, - fwnode, hns_mac_phydev_match); - return dev ? to_platform_device(dev) : NULL; -} - static int hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb, u32 addr) @@ -724,7 +709,7 @@ static void hns_mac_register_phy(struct hns_mac_cb *mac_cb) return; /* dev address in adev */ - pdev = hns_mac_find_platform_device(acpi_fwnode_handle(args.adev)); + pdev = hns_dsaf_find_platform_device(acpi_fwnode_handle(args.adev)); mii_bus = platform_get_drvdata(pdev); rc = hns_mac_register_phydev(mii_bus, mac_cb, addr); if (!rc) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 05bd19f9ebc51c..9283bc60bb2405 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2788,7 +2788,7 @@ module_platform_driver(g_dsaf_driver); * @enable: false - request reset , true - drop reset * retuen 0 - success , negative -fail */ -int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) { struct dsaf_device *dsaf_dev; struct platform_device *pdev; @@ -2817,24 +2817,44 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, }; - if (!is_of_node(dsaf_fwnode)) { - pr_err("hisi_dsaf: Only support DT node!\n"); + /* find the platform device corresponding to fwnode */ + if (is_of_node(dsaf_fwnode)) { + pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + } else if (is_acpi_device_node(dsaf_fwnode)) { + pdev = hns_dsaf_find_platform_device(dsaf_fwnode); + } else { + pr_err("fwnode is neither OF or ACPI type\n"); return -EINVAL; } - pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + + /* check if we were a success in fetching pdev */ + if (!pdev) { + pr_err("couldn't find platform device for node\n"); + return -ENODEV; + } + + /* retrieve the dsaf_device from the driver data */ dsaf_dev = dev_get_drvdata(&pdev->dev); + if (!dsaf_dev) { + dev_err(&pdev->dev, "dsaf_dev is NULL\n"); + return -ENODEV; + } + + /* now, make sure we are running on compatible SoC */ if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", dsaf_dev->ae_dev.name); return -ENODEV; } - if (!enable) { - /* Reset rocee-channels in dsaf and rocee */ - hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false); - hns_dsaf_roce_srst(dsaf_dev, false); + /* do reset or de-reset according to the flag */ + if (!dereset) { + /* reset rocee-channels in dsaf and rocee */ + dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, + false); + dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, false); } else { - /* Configure dsaf tx roce correspond to port map and sl map */ + /* configure dsaf tx roce correspond to port map and sl map */ mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG); for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) dsaf_set_field(mp, 7 << i * 3, i * 3, @@ -2848,12 +2868,13 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) sl_map[i][DSAF_ROCE_6PORT_MODE]); dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl); - /* De-reset rocee-channels in dsaf and rocee */ - hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true); + /* de-reset rocee-channels in dsaf and rocee */ + dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, + true); msleep(SRST_TIME_INTERVAL); - hns_dsaf_roce_srst(dsaf_dev, true); + dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, true); - /* Eanble dsaf channel rocee credit */ + /* enable dsaf channel rocee credit */ credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG); dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0); dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index f3681d566ae61a..35df187e66f190 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -305,7 +305,7 @@ struct dsaf_misc_op { void (*cpld_reset_led)(struct hns_mac_cb *mac_cb); int (*cpld_set_led_id)(struct hns_mac_cb *mac_cb, enum hnae_led_state status); - /* reset seris function, it will be reset if the dereseet is 0 */ + /* reset series function, it will be reset if the dereset is 0 */ void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset); void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port, @@ -313,6 +313,9 @@ struct dsaf_misc_op { void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset); + void (*hns_dsaf_srst_chns)(struct dsaf_device *dsaf_dev, u32 msk, + bool dereset); + void (*hns_dsaf_roce_srst)(struct dsaf_device *dsaf_dev, bool dereset); phy_interface_t (*get_phy_if)(struct hns_mac_cb *mac_cb); int (*get_sfp_prsnt)(struct hns_mac_cb *mac_cb, int *sfp_prsnt); @@ -445,10 +448,6 @@ int hns_dsaf_get_mac_entry_by_index( void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb); -void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable); - -void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable); - int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev); void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 36b9f791cf2fc1..67accce1d33d09 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -26,6 +26,8 @@ enum _dsm_rst_type { HNS_XGE_CORE_RESET_FUNC = 0x3, HNS_XGE_RESET_FUNC = 0x4, HNS_GE_RESET_FUNC = 0x5, + HNS_DSAF_CHN_RESET_FUNC = 0x6, + HNS_ROCE_RESET_FUNC = 0x7, }; const u8 hns_dsaf_acpi_dsm_uuid[] = { @@ -241,11 +243,11 @@ static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, * bit18-19 for com/dfx * @enable: false - request reset , true - drop reset */ -void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable) +void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) { u32 reg_addr; - if (!enable) + if (!dereset) reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG; else reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG; @@ -253,9 +255,27 @@ void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable) dsaf_write_sub(dsaf_dev, reg_addr, msk); } -void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable) +/** + * hns_dsaf_srst_chns - reset dsaf channels + * @dsaf_dev: dsaf device struct pointer + * @msk: xbar channels mask value: + * bit0-5 for xge0-5 + * bit6-11 for ppe0-5 + * bit12-17 for roce0-5 + * bit18-19 for com/dfx + * @enable: false - request reset , true - drop reset + */ +void +hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) { - if (!enable) { + hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, + HNS_DSAF_CHN_RESET_FUNC, + msk, dereset); +} + +void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool dereset) +{ + if (!dereset) { dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1); } else { dsaf_write_sub(dsaf_dev, @@ -267,6 +287,12 @@ void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable) } } +void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset) +{ + hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, + HNS_ROCE_RESET_FUNC, 0, dereset); +} + static void hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev, u32 port, bool dereset) @@ -575,6 +601,8 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->ge_srst = hns_dsaf_ge_srst_by_port; misc_op->ppe_srst = hns_ppe_srst_by_port; misc_op->ppe_comm_srst = hns_ppe_com_srst; + misc_op->hns_dsaf_srst_chns = hns_dsaf_srst_chns; + misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst; misc_op->get_phy_if = hns_mac_get_phy_if; misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt; @@ -591,6 +619,8 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi; misc_op->ppe_srst = hns_ppe_srst_by_port_acpi; misc_op->ppe_comm_srst = hns_ppe_com_srst; + misc_op->hns_dsaf_srst_chns = hns_dsaf_srst_chns_acpi; + misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi; misc_op->get_phy_if = hns_mac_get_phy_if_acpi; misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt; @@ -603,3 +633,18 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) return (void *)misc_op; } + +static int hns_dsaf_dev_match(struct device *dev, void *fwnode) +{ + return dev->fwnode == fwnode; +} + +struct +platform_device *hns_dsaf_find_platform_device(struct fwnode_handle *fwnode) +{ + struct device *dev; + + dev = bus_find_device(&platform_bus_type, NULL, + fwnode, hns_dsaf_dev_match); + return dev ? to_platform_device(dev) : NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h index f06bb03d47a6e1..310e80261366ae 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h @@ -34,5 +34,6 @@ #define DSAF_LED_ANCHOR_B 5 struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev); - +struct +platform_device *hns_dsaf_find_platform_device(struct fwnode_handle *fwnode); #endif diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 13c16ab7be4808..4b8b803822d1b2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -78,10 +78,10 @@ #define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C #define DSAF_SUB_SC_DSAF_RESET_REQ_REG 0xAA8 -#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG 0xA50 #define DSAF_SUB_SC_DSAF_RESET_DREQ_REG 0xAAC -#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG 0x32C +#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG 0xA50 #define DSAF_SUB_SC_ROCEE_RESET_DREQ_REG 0xA54 +#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG 0x32C #define DSAF_SUB_SC_ROCEE_CLK_EN_REG 0x328 #define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 #define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 From 528f1deb16e5b82e9fe161ebd8caa5983766f0f0 Mon Sep 17 00:00:00 2001 From: Salil Date: Wed, 24 Aug 2016 04:44:50 +0800 Subject: [PATCH 07/75] IB/hns: Add support of ACPI to the Hisilicon RoCE driver This patch is meant to add support of ACPI to the Hisilicon RoCE driver. Changes done are primarily meant to detect the type and then either use DT specific or ACPI spcific functions. Where ever possible, this patch tries to make use of Unified Device Property Interface APIs to support both DT and ACPI through single interface. This patch depends upon HNS ethernet driver to Reset RoCE. This function within HNS ethernet driver has also been enhanced to support ACPI and is part of other accompanying patch with this patch-set. NOTE: The changes in this patch are done over below branch, https://github.com/dledford/linux/tree/hns-roce Signed-off-by: Salil Mehta Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_eq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 37 ++++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 +- drivers/infiniband/hw/hns/hns_roce_main.c | 127 ++++++++++++++++---- 5 files changed, 136 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 00f01be1ffa548..ea735800eb18e8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -531,7 +531,7 @@ struct hns_roce_dev { struct ib_device ib_dev; struct platform_device *pdev; struct hns_roce_uar priv_uar; - const char *irq_names; + const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; spinlock_t cq_db_lock; spinlock_t bt_cmd_lock; diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 5febbb4b68e798..98af7fecf2f1e9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -713,7 +713,7 @@ int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev) for (j = 0; j < eq_num; j++) { ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt, - 0, hr_dev->irq_names, eq_table->eq + j); + 0, hr_dev->irq_names[j], eq_table->eq + j); if (ret) { dev_err(dev, "request irq error!\n"); goto err_request_irq_fail; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b52f3badf855c5..399f5dedaf2dd6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -31,6 +31,7 @@ */ #include +#include #include #include "hns_roce_common.h" #include "hns_roce_device.h" @@ -794,29 +795,47 @@ static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) * @enable: true -- drop reset, false -- reset * return 0 - success , negative --fail */ -int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool enable) +int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) { struct device_node *dsaf_node; struct device *dev = &hr_dev->pdev->dev; struct device_node *np = dev->of_node; + struct fwnode_handle *fwnode; int ret; - dsaf_node = of_parse_phandle(np, "dsaf-handle", 0); - if (!dsaf_node) { - dev_err(dev, "Unable to get dsaf node by dsaf-handle!\n"); - return -EINVAL; + /* check if this is DT/ACPI case */ + if (dev_of_node(dev)) { + dsaf_node = of_parse_phandle(np, "dsaf-handle", 0); + if (!dsaf_node) { + dev_err(dev, "could not find dsaf-handle\n"); + return -EINVAL; + } + fwnode = &dsaf_node->fwnode; + } else if (is_acpi_device_node(dev->fwnode)) { + struct acpi_reference_args args; + + ret = acpi_node_get_property_reference(dev->fwnode, + "dsaf-handle", 0, &args); + if (ret) { + dev_err(dev, "could not find dsaf-handle\n"); + return ret; + } + fwnode = acpi_fwnode_handle(args.adev); + } else { + dev_err(dev, "cannot read data from DT or ACPI\n"); + return -ENXIO; } - ret = hns_dsaf_roce_reset(&dsaf_node->fwnode, false); + ret = hns_dsaf_roce_reset(fwnode, false); if (ret) return ret; - if (enable) { + if (dereset) { msleep(SLEEP_TIME_INTERVAL); - return hns_dsaf_roce_reset(&dsaf_node->fwnode, true); + ret = hns_dsaf_roce_reset(fwnode, true); } - return 0; + return ret; } void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 2b3bf212570b00..316b592b1636df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -976,6 +976,6 @@ struct hns_roce_v1_priv { struct hns_roce_raq_table raq_table; }; -int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable); +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6ead671966bd92..f64f0dde9a882c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -30,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - +#include #include #include #include @@ -694,40 +694,122 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) return ret; } +static const struct of_device_id hns_roce_of_match[] = { + { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, }, + {}, +}; +MODULE_DEVICE_TABLE(of, hns_roce_of_match); + +static const struct acpi_device_id hns_roce_acpi_match[] = { + { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); + +static int hns_roce_node_match(struct device *dev, void *fwnode) +{ + return dev->fwnode == fwnode; +} + +static struct +platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) +{ + struct device *dev; + + /* get the 'device'corresponding to matching 'fwnode' */ + dev = bus_find_device(&platform_bus_type, NULL, + fwnode, hns_roce_node_match); + /* get the platform device */ + return dev ? to_platform_device(dev) : NULL; +} + static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) { int i; + int ret; u8 phy_port; int port_cnt = 0; struct device *dev = &hr_dev->pdev->dev; - struct device_node *np = dev->of_node; struct device_node *net_node; struct net_device *netdev = NULL; struct platform_device *pdev = NULL; struct resource *res; - if (of_device_is_compatible(np, "hisilicon,hns-roce-v1")) { - hr_dev->hw = &hns_roce_hw_v1; + /* check if we are compatible with the underlying SoC */ + if (dev_of_node(dev)) { + const struct of_device_id *of_id; + + of_id = of_match_node(hns_roce_of_match, dev->of_node); + if (!of_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (struct hns_roce_hw *)of_id->data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific DT data!\n"); + return -ENXIO; + } + } else if (is_acpi_device_node(dev->fwnode)) { + const struct acpi_device_id *acpi_id; + + acpi_id = acpi_match_device(hns_roce_acpi_match, dev); + if (!acpi_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific ACPI data!\n"); + return -ENXIO; + } } else { - dev_err(dev, "device no compatible!\n"); - return -EINVAL; + dev_err(dev, "can't read compatibility data from DT or ACPI\n"); + return -ENXIO; } + /* get the mapped register base address */ res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "memory resource not found!\n"); + return -EINVAL; + } hr_dev->reg_base = devm_ioremap_resource(dev, res); if (IS_ERR(hr_dev->reg_base)) return PTR_ERR(hr_dev->reg_base); + /* get the RoCE associated ethernet ports or netdevices */ for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { - net_node = of_parse_phandle(np, "eth-handle", i); - if (net_node) { + if (dev_of_node(dev)) { + net_node = of_parse_phandle(dev->of_node, "eth-handle", + i); + if (!net_node) + continue; pdev = of_find_device_by_node(net_node); + } else if (is_acpi_device_node(dev->fwnode)) { + struct acpi_reference_args args; + struct fwnode_handle *fwnode; + + ret = acpi_node_get_property_reference(dev->fwnode, + "eth-handle", + i, &args); + if (ret) + continue; + fwnode = acpi_fwnode_handle(args.adev); + pdev = hns_roce_find_pdev(fwnode); + } else { + dev_err(dev, "cannot read data from DT or ACPI\n"); + return -ENXIO; + } + + if (pdev) { netdev = platform_get_drvdata(pdev); phy_port = (u8)i; if (netdev) { hr_dev->iboe.netdevs[port_cnt] = netdev; hr_dev->iboe.phy_port[port_cnt] = phy_port; } else { + dev_err(dev, "no netdev found with pdev %s\n", + pdev->name); return -ENODEV; } port_cnt++; @@ -735,26 +817,32 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) } if (port_cnt == 0) { - dev_err(dev, "Unable to get available port by eth-handle!\n"); + dev_err(dev, "unable to get eth-handle for available ports!\n"); return -EINVAL; } hr_dev->caps.num_ports = port_cnt; - /* Cmd issue mode: 0 is poll, 1 is event */ + /* cmd issue mode: 0 is poll, 1 is event */ hr_dev->cmd_mod = 1; hr_dev->loop_idc = 0; + /* read the interrupt names from the DT or ACPI */ + ret = device_property_read_string_array(dev, "interrupt-names", + hr_dev->irq_names, + HNS_ROCE_MAX_IRQ_NUM); + if (ret < 0) { + dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); + return ret; + } + + /* fetch the interrupt numbers */ for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); if (hr_dev->irq[i] <= 0) { - dev_err(dev, "Get No.%d irq resource failed!\n", i); + dev_err(dev, "platform get of irq[=%d] failed!\n", i); return -EINVAL; } - - if (of_property_read_string_index(np, "interrupt-names", i, - &hr_dev->irq_names)) - return -EINVAL; } return 0; @@ -917,7 +1005,7 @@ static int hns_roce_probe(struct platform_device *pdev) if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { - dev_err(dev, "No usable DMA addressing mode\n"); + dev_err(dev, "Not usable DMA addressing mode\n"); ret = -EIO; goto error_failed_get_cfg; } @@ -1035,18 +1123,13 @@ static int hns_roce_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id hns_roce_of_match[] = { - { .compatible = "hisilicon,hns-roce-v1",}, - {}, -}; -MODULE_DEVICE_TABLE(of, hns_roce_of_match); - static struct platform_driver hns_roce_driver = { .probe = hns_roce_probe, .remove = hns_roce_remove, .driver = { .name = DRV_NAME, .of_match_table = hns_roce_of_match, + .acpi_match_table = ACPI_PTR(hns_roce_acpi_match), }, }; From 50d46335b03baa9767e79a6f4757df7bd31eba21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:16 +0200 Subject: [PATCH 08/75] IB/core: rename pd->local_mr to pd->__internal_mr This has two reasons: a) to clearly mark that drivers don't have any business using it, and b) because we're going to use it for the (dangerous) global rkey soon, so that drivers don't create on themselves. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 12 ++++++------ include/rdma/ib_verbs.h | 6 +++++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f6647318138d77..fe784a90881730 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -571,7 +571,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->device = ib_dev; pd->uobject = uobj; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); uobj->object = pd; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f2b776efab3a3e..9159ea5ad82181 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -237,7 +237,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) pd->device = device; pd->uobject = NULL; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) @@ -251,8 +251,8 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) return (struct ib_pd *)mr; } - pd->local_mr = mr; - pd->local_dma_lkey = pd->local_mr->lkey; + pd->__internal_mr = mr; + pd->local_dma_lkey = pd->__internal_mr->lkey; } return pd; } @@ -270,10 +270,10 @@ void ib_dealloc_pd(struct ib_pd *pd) { int ret; - if (pd->local_mr) { - ret = ib_dereg_mr(pd->local_mr); + if (pd->__internal_mr) { + ret = ib_dereg_mr(pd->__internal_mr); WARN_ON(ret); - pd->local_mr = NULL; + pd->__internal_mr = NULL; } /* uverbs manipulates usecnt with proper locking, while the kabi diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e90dd28bb7536..38a08dae49c4c4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1373,7 +1373,11 @@ struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ - struct ib_mr *local_mr; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct ib_mr *__internal_mr; }; struct ib_xrcd { From ed082d36a7b2c27d1cda55fdfb28af18040c4a89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:17 +0200 Subject: [PATCH 09/75] IB/core: add support to create a unsafe global rkey to ib_create_pd Instead of exposing ib_get_dma_mr to ULPs and letting them use it more or less unchecked, this moves the capability of creating a global rkey into the RDMA core, where it can be easily audited. It also prints a warning everytime this feature is used as well. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 2 +- drivers/infiniband/core/verbs.c | 27 +++++++++++++++---- drivers/infiniband/hw/mlx4/mad.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/rdma.c | 2 +- .../lustre/lnet/klnds/o2iblnd/o2iblnd.c | 2 +- include/rdma/ib_verbs.h | 20 +++++++++++++- net/9p/trans_rdma.c | 2 +- net/rds/ib.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- 18 files changed, 57 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2d49228f28b2b1..95d33a3572e6d2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3160,7 +3160,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error3; } - port_priv->pd = ib_alloc_pd(device); + port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9159ea5ad82181..e87b5187729ce0 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -227,9 +227,11 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ -struct ib_pd *ib_alloc_pd(struct ib_device *device) +struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller) { struct ib_pd *pd; + int mr_access_flags = 0; pd = device->alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) @@ -239,24 +241,39 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) pd->uobject = NULL; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); + pd->flags = flags; if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; - else { + else + mr_access_flags |= IB_ACCESS_LOCAL_WRITE; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + pr_warn("%s: enabling unsafe global rkey\n", caller); + mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; + } + + if (mr_access_flags) { struct ib_mr *mr; - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + mr = ib_get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return (struct ib_pd *)mr; } pd->__internal_mr = mr; - pd->local_dma_lkey = pd->__internal_mr->lkey; + + if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + pd->local_dma_lkey = pd->__internal_mr->lkey; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) + pd->unsafe_global_rkey = pd->__internal_mr->rkey; } + return pd; } -EXPORT_SYMBOL(ib_alloc_pd); +EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d28f9857..517346f8ed7581 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1897,7 +1897,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_buf; } - ctx->pd = ib_alloc_pd(ctx->ib_dev); + ctx->pd = ib_alloc_pd(ctx->ib_dev, 0); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); pr_err("Couldn't create tunnel PD (%d)\n", ret); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2de2624a..a96e78c2c75ac3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1259,7 +1259,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, if (err) goto err1; - xrcd->pd = ib_alloc_pd(ibdev); + xrcd->pd = ib_alloc_pd(ibdev, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f02a975320bd5b..cf1eee492a4e24 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2223,7 +2223,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - pd = ib_alloc_pd(&dev->ib_dev); + pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); ret = PTR_ERR(pd); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index c55ecb2c3736ce..6067f075772a40 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size; int i; - priv->pd = ib_alloc_pd(priv->ca); + priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 1b4945367e4f6d..e9de99219d74b5 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,7 +88,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device->comps_used, ib_dev->name, ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) goto pd_err; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be060a476b1..8df608ede3668e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device) if (ret) goto out; - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3322ed750172ea..579b8aedfcdd53 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3573,7 +3573,7 @@ static void srp_add_one(struct ib_device *device) INIT_LIST_HEAD(&srp_dev->dev_list); srp_dev->dev = device; - srp_dev->pd = ib_alloc_pd(device); + srp_dev->pd = ib_alloc_pd(device, 0); if (IS_ERR(srp_dev->pd)) goto free_dev; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b075a8846..48a44af740a6c5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2475,7 +2475,7 @@ static void srpt_add_one(struct ib_device *device) init_waitqueue_head(&sdev->ch_releaseQ); mutex_init(&sdev->mutex); - sdev->pd = ib_alloc_pd(device); + sdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(sdev->pd)) goto free_dev; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8d2875b4c56d8c..a4961edf0ca154 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -446,7 +446,7 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev); + ndev->pd = ib_alloc_pd(ndev->dev, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b4d648536c3e43..187763a7735556 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -848,7 +848,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->device = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->device); + ndev->pd = ib_alloc_pd(ndev->device, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 4f5978b3767bfb..0e4c6090bf625d 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -2465,7 +2465,7 @@ int kiblnd_dev_failover(struct kib_dev *dev) hdev->ibh_cmid = cmid; hdev->ibh_ibdev = cmid->device; - pd = ib_alloc_pd(cmid->device); + pd = ib_alloc_pd(cmid->device, 0); if (IS_ERR(pd)) { rc = PTR_ERR(pd); CERROR("Can't allocate PD: %d\n", rc); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 38a08dae49c4c4..4bdd898697cfd3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1370,10 +1370,13 @@ struct ib_udata { struct ib_pd { u32 local_dma_lkey; + u32 flags; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ + u32 unsafe_global_rkey; + /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -2506,8 +2509,23 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -struct ib_pd *ib_alloc_pd(struct ib_device *device); +enum ib_pd_flags { + /* + * Create a memory registration for all memory in the system and place + * the rkey for it into pd->unsafe_global_rkey. This can be used by + * ULPs to avoid the overhead of dynamic MRs. + * + * This flag is generally considered unsafe and must only be used in + * extremly trusted environments. Every use of it will log a warning + * in the kernel log. + */ + IB_PD_UNSAFE_GLOBAL_RKEY = 0x01, +}; +struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller); +#define ib_alloc_pd(device, flags) \ + __ib_alloc_pd((device), (flags), __func__) void ib_dealloc_pd(struct ib_pd *pd); /** diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 1852e383afd626..553ed4ecb6a0ec 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -680,7 +680,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Protection Domain */ - rdma->pd = ib_alloc_pd(rdma->cm_id->device); + rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); if (IS_ERR(rdma->pd)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index 7eaf887e46f8ef..5680d90b0b779e 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -160,7 +160,7 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->dev = device; - rds_ibdev->pd = ib_alloc_pd(device); + rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { rds_ibdev->pd = NULL; goto put_dev; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index dd9440137834c7..eb2857f52b0513 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -993,7 +993,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord); newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); - newxprt->sc_pd = ib_alloc_pd(dev); + newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { dprintk("svcrdma: error creating PD for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 536d0be3f61bdd..6561d4a35acbe5 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -386,7 +386,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_device); + ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); From 8e61212d05963a3beecb8bf124b88a0b13a9600d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:18 +0200 Subject: [PATCH 10/75] IB/iser: use IB_PD_UNSAFE_GLOBAL_RKEY Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.h | 1 - drivers/infiniband/ulp/iser/iser_memory.c | 6 +++++- drivers/infiniband/ulp/iser/iser_verbs.c | 22 +++------------------- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0351059783b129..0be6a7c5ddb5ae 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -374,7 +374,6 @@ struct iser_reg_ops { struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; - struct ib_mr *mr; struct ib_event_handler event_handler; struct list_head ig_list; int refcount; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 90be568934146a..9c3e9ab53a4157 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -199,7 +199,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, * FIXME: rework the registration code path to differentiate * rkey/lkey use cases */ - reg->rkey = device->mr ? device->mr->rkey : 0; + + if (device->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) + reg->rkey = device->pd->unsafe_global_rkey; + else + reg->rkey = 0; reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e9de99219d74b5..a4b791dfaa1db4 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,7 +88,8 @@ static int iser_create_device_ib_res(struct iser_device *device) device->comps_used, ib_dev->name, ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev, 0); + device->pd = ib_alloc_pd(ib_dev, + iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(device->pd)) goto pd_err; @@ -103,26 +104,13 @@ static int iser_create_device_ib_res(struct iser_device *device) } } - if (!iser_always_reg) { - int access = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ; - - device->mr = ib_get_dma_mr(device->pd, access); - if (IS_ERR(device->mr)) - goto cq_err; - } - INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, iser_event_handler); if (ib_register_event_handler(&device->event_handler)) - goto handler_err; + goto cq_err; return 0; -handler_err: - if (device->mr) - ib_dereg_mr(device->mr); cq_err: for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; @@ -154,14 +142,10 @@ static void iser_free_device_ib_res(struct iser_device *device) } (void)ib_unregister_event_handler(&device->event_handler); - if (device->mr) - (void)ib_dereg_mr(device->mr); ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; - - device->mr = NULL; device->pd = NULL; } From 5f071777f9cbd71faa00eb854d15f42ae74e0471 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:19 +0200 Subject: [PATCH 11/75] IB/srp: use IB_PD_UNSAFE_GLOBAL_RKEY Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 44 +++++++++++++---------------- drivers/infiniband/ulp/srp/ib_srp.h | 3 +- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 579b8aedfcdd53..5ed1e8841a7b0f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1262,6 +1262,7 @@ static int srp_map_finish_fmr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + struct ib_pd *pd = target->pd; struct ib_pool_fmr *fmr; u64 io_addr = 0; @@ -1273,9 +1274,9 @@ static int srp_map_finish_fmr(struct srp_map_state *state, if (state->npages == 0) return 0; - if (state->npages == 1 && target->global_mr) { + if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { srp_map_desc(state, state->base_dma_addr, state->dma_len, - target->global_mr->rkey); + pd->unsafe_global_rkey); goto reset_state; } @@ -1315,6 +1316,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + struct ib_pd *pd = target->pd; struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; @@ -1326,12 +1328,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); - if (sg_nents == 1 && target->global_mr) { + if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, sg_dma_len(state->sg) - sg_offset, - target->global_mr->rkey); + pd->unsafe_global_rkey); if (sg_offset_p) *sg_offset_p = 0; return 1; @@ -1491,7 +1493,7 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), - target->global_mr->rkey); + target->pd->unsafe_global_rkey); } return 0; @@ -1591,6 +1593,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; + struct ib_pd *pd = target->pd; struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; int len, nents, count, ret; @@ -1626,7 +1629,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1 && target->global_mr) { + if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1636,7 +1639,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf = (void *) cmd->add_data; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); - buf->key = cpu_to_be32(target->global_mr->rkey); + buf->key = cpu_to_be32(pd->unsafe_global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); req->nmdesc = 0; @@ -1709,14 +1712,14 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, memcpy(indirect_hdr->desc_list, req->indirect_desc, count * sizeof (struct srp_direct_buf)); - if (!target->global_mr) { + if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, idb_len, &idb_rkey); if (ret < 0) goto unmap; req->nmdesc++; } else { - idb_rkey = cpu_to_be32(target->global_mr->rkey); + idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); } indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); @@ -3268,8 +3271,8 @@ static ssize_t srp_create_target(struct device *dev, target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; + target->pd = host->srp_dev->pd; target->lkey = host->srp_dev->pd->local_dma_lkey; - target->global_mr = host->srp_dev->global_mr; target->cmd_sg_cnt = cmd_sg_entries; target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; target->allow_ext_sg = allow_ext_sg; @@ -3524,6 +3527,7 @@ static void srp_add_one(struct ib_device *device) struct srp_host *host; int mr_page_shift, p; u64 max_pages_per_mr; + unsigned int flags = 0; srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) @@ -3558,6 +3562,10 @@ static void srp_add_one(struct ib_device *device) srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; } + if (never_register || !register_always || + (!srp_dev->has_fmr && !srp_dev->has_fr)) + flags |= IB_PD_UNSAFE_GLOBAL_RKEY; + if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = min_t(u32, srp_dev->max_pages_per_mr, @@ -3573,19 +3581,10 @@ static void srp_add_one(struct ib_device *device) INIT_LIST_HEAD(&srp_dev->dev_list); srp_dev->dev = device; - srp_dev->pd = ib_alloc_pd(device, 0); + srp_dev->pd = ib_alloc_pd(device, flags); if (IS_ERR(srp_dev->pd)) goto free_dev; - if (never_register || !register_always || - (!srp_dev->has_fmr && !srp_dev->has_fr)) { - srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(srp_dev->global_mr)) - goto err_pd; - } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); @@ -3596,9 +3595,6 @@ static void srp_add_one(struct ib_device *device) ib_set_client_data(device, &srp_client, srp_dev); return; -err_pd: - ib_dealloc_pd(srp_dev->pd); - free_dev: kfree(srp_dev); } @@ -3638,8 +3634,6 @@ static void srp_remove_one(struct ib_device *device, void *client_data) kfree(host); } - if (srp_dev->global_mr) - ib_dereg_mr(srp_dev->global_mr); ib_dealloc_pd(srp_dev->pd); kfree(srp_dev); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 26bb9b0a7a639d..21c69695f9d4f4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -90,7 +90,6 @@ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *global_mr; u64 mr_page_mask; int mr_page_size; int mr_max_size; @@ -179,7 +178,7 @@ struct srp_target_port { spinlock_t lock; /* read only in the hot path */ - struct ib_mr *global_mr; + struct ib_pd *pd; struct srp_rdma_ch *ch; u32 ch_count; u32 lkey; From 11975e0149002b88aa8003bfa6172f49e3351768 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:20 +0200 Subject: [PATCH 12/75] nvme-rdma: use IB_PD_UNSAFE_GLOBAL_RKEY Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/nvme/host/rdma.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a4961edf0ca154..1a185479679709 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -58,7 +58,6 @@ struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct kref ref; struct list_head entry; }; @@ -410,10 +409,7 @@ static void nvme_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - if (!register_always) - ib_dereg_mr(ndev->mr); ib_dealloc_pd(ndev->pd); - kfree(ndev); } @@ -446,24 +442,16 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev, 0); + ndev->pd = ib_alloc_pd(ndev->dev, + register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(ndev->pd)) goto out_free_dev; - if (!register_always) { - ndev->mr = ib_get_dma_mr(ndev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(ndev->mr)) - goto out_free_pd; - } - if (!(ndev->dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { dev_err(&ndev->dev->dev, "Memory registrations not supported.\n"); - goto out_free_mr; + goto out_free_pd; } list_add(&ndev->entry, &device_list); @@ -471,9 +459,6 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) mutex_unlock(&device_list_mutex); return ndev; -out_free_mr: - if (!register_always) - ib_dereg_mr(ndev->mr); out_free_pd: ib_dealloc_pd(ndev->pd); out_free_dev: @@ -903,7 +888,7 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); - put_unaligned_le32(queue->device->mr->rkey, sg->key); + put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; } @@ -988,7 +973,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, nvme_rdma_queue_idx(queue)) return nvme_rdma_map_sg_inline(queue, req, c); - if (!register_always) + if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) return nvme_rdma_map_sg_single(queue, req, c); } From 5ef990f06bd7e3cf521b5705d898d8e43d04ea90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:21 +0200 Subject: [PATCH 13/75] IB/core: remove ib_get_dma_mr We now only use it from ib_alloc_pd to create a local DMA lkey if the device doesn't provide one, or a global rkey if the ULP requests it. This patch removes ib_get_dma_mr and open codes the functionality in ib_alloc_pd so that we can simplify the code and prevent abuse of the functionality. As a side effect we can also simplify things by removing the valid access bit check, and the PD refcounting. In the future I hope to also remove the per-PD global MR entirely by shifting this work into the HW drivers, as one step towards avoiding the struct ib_mr overload for various different use cases. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 34 ++++++++------------------------- include/rdma/ib_verbs.h | 12 ------------ 2 files changed, 8 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e87b5187729ce0..d4ef3b1a695a3f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -256,12 +256,17 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, if (mr_access_flags) { struct ib_mr *mr; - mr = ib_get_dma_mr(pd, mr_access_flags); + mr = pd->device->get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); - return (struct ib_pd *)mr; + return ERR_CAST(mr); } + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + mr->need_inval = false; + pd->__internal_mr = mr; if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) @@ -288,7 +293,7 @@ void ib_dealloc_pd(struct ib_pd *pd) int ret; if (pd->__internal_mr) { - ret = ib_dereg_mr(pd->__internal_mr); + ret = pd->device->dereg_mr(pd->__internal_mr); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -1408,29 +1413,6 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *mr; - int err; - - err = ib_check_mr_access(mr_access_flags); - if (err) - return ERR_PTR(err); - - mr = pd->device->get_dma_mr(pd, mr_access_flags); - - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - } - - return mr; -} -EXPORT_SYMBOL(ib_get_dma_mr); - int ib_dereg_mr(struct ib_mr *mr) { struct ib_pd *pd = mr->pd; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4bdd898697cfd3..0a6c70895c8b95 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2878,18 +2878,6 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } -/** - * ib_get_dma_mr - Returns a memory region for system memory that is - * usable for DMA. - * @pd: The protection domain associated with the memory region. - * @mr_access_flags: Specifies the memory access rights. - * - * Note that the ib_dma_*() functions defined below must be used - * to create/destroy addresses used with the Lkey or Rkey returned - * by ib_get_dma_mr(). - */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created From ccf20562601d2b911787ffa730ad96bb78269a9c Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 11:28:43 +0300 Subject: [PATCH 14/75] IB/core: Expose RSS related capabilities Expose RSS related capabilities, it includes both direct ones (i.e. struct ib_rss_caps) and max_wq_type_rq which may be used in both RSS and non RSS flows. Specifically, supported_qpts: - QP types that support RSS on the device. max_rwq_indirection_tables: - Max number of receive work queue indirection tables that could be opened on the device. max_rwq_indirection_table_size: - Max size of a receive work queue indirection table. max_wq_type_rq: - Max number of work queues of receive type that could be opened on the device. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0a6c70895c8b95..d74c76bf76d33d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -261,6 +261,16 @@ struct ib_odp_caps { } per_transport_caps; }; +struct ib_rss_caps { + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + u32 supported_qpts; + u32 max_rwq_indirection_tables; + u32 max_rwq_indirection_table_size; +}; + enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, @@ -318,6 +328,8 @@ struct ib_device_attr { struct ib_odp_caps odp_caps; uint64_t timestamp_mask; uint64_t hca_core_clock; /* in KHZ */ + struct ib_rss_caps rss_caps; + u32 max_wq_type_rq; }; enum ib_mtu { From 47adf2f4f5805d075359fe6cbe3437612f7d4a34 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 11:28:44 +0300 Subject: [PATCH 15/75] IB/uverbs: Expose RSS related capabilities Query RSS related attributes and return them to user-space via the extended query device uverbs command. It includes both direct ones (i.e. struct ib_uverbs_rss_caps) and max_wq_type_rq which may be used in both RSS and non RSS flows. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 17 +++++++++++++++++ include/uapi/rdma/ib_user_verbs.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index fe784a90881730..19c1ebf596ad15 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -4173,6 +4173,23 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.device_cap_flags_ex = attr.device_cap_flags; resp.response_length += sizeof(resp.device_cap_flags_ex); + + if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) + goto end; + + resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; + resp.rss_caps.max_rwq_indirection_tables = + attr.rss_caps.max_rwq_indirection_tables; + resp.rss_caps.max_rwq_indirection_table_size = + attr.rss_caps.max_rwq_indirection_table_size; + + resp.response_length += sizeof(resp.rss_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) + goto end; + + resp.max_wq_type_rq = attr.max_wq_type_rq; + resp.response_length += sizeof(resp.max_wq_type_rq); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 7f035f4b53b0f8..a9ebb477dc7733 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -224,6 +224,17 @@ struct ib_uverbs_odp_caps { __u32 reserved; }; +struct ib_uverbs_rss_caps { + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; + __u32 max_rwq_indirection_tables; + __u32 max_rwq_indirection_table_size; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; @@ -232,6 +243,9 @@ struct ib_uverbs_ex_query_device_resp { __u64 timestamp_mask; __u64 hca_core_clock; /* in KHZ */ __u64 device_cap_flags_ex; + struct ib_uverbs_rss_caps rss_caps; + __u32 max_wq_type_rq; + __u32 reserved; }; struct ib_uverbs_query_port { From 31f69a82b456e3ae9b5572961b4cc4ec971e9dc7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 11:28:45 +0300 Subject: [PATCH 16/75] IB/mlx5: Expose RSS related capabilities Expose RSS related capabilities on both IB and vendor channels. In addition to the IB capabilities the driver reports some extra capabilities on its vendor channel: - Bit mask of the supported types of hash functions. - Bit mask of the supported RX fields that can participate in the RX hashing. Those capabilities are applicable only when the link layer is Ethernet. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 31 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/user.h | 7 +++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cf1eee492a4e24..840fbd0c9dca4c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -531,6 +531,26 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.tso_caps); } } + + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + resp.rss_caps.rx_hash_function = + MLX5_RX_HASH_FUNC_TOEPLITZ; + resp.rss_caps.rx_hash_fields_mask = + MLX5_RX_HASH_SRC_IPV4 | + MLX5_RX_HASH_DST_IPV4 | + MLX5_RX_HASH_SRC_IPV6 | + MLX5_RX_HASH_DST_IPV6 | + MLX5_RX_HASH_SRC_PORT_TCP | + MLX5_RX_HASH_DST_PORT_TCP | + MLX5_RX_HASH_SRC_PORT_UDP | + MLX5_RX_HASH_DST_PORT_UDP; + resp.response_length += sizeof(resp.rss_caps); + } + } else { + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + resp.response_length += sizeof(resp.tso_caps); + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + resp.response_length += sizeof(resp.rss_caps); } if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { @@ -594,6 +614,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (mlx5_ib_port_link_layer(ibdev, 1) == + IB_LINK_LAYER_ETHERNET) { + props->rss_caps.max_rwq_indirection_tables = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); + props->rss_caps.max_rwq_indirection_table_size = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; + props->max_wq_type_rq = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 188dac4301b53f..0e49d5b30a4c5a 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -121,10 +121,17 @@ struct mlx5_ib_tso_caps { __u32 supported_qpts; }; +struct mlx5_ib_rss_caps { + __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ + __u8 reserved[7]; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; struct mlx5_ib_tso_caps tso_caps; + struct mlx5_ib_rss_caps rss_caps; }; struct mlx5_ib_create_cq { From 0680efa21478f7b3e775bf4253c615538cef2ebf Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Sun, 28 Aug 2016 12:25:52 +0300 Subject: [PATCH 17/75] IB/mlx5: Refactor raw packet QP modify function Added a struct for modifying raw QP, this will allow modifying multiple parameters in raw packet QP RQ and can also be used for SQ in the future. Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f3c943f6458e18..7686ee89189b88 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +struct mlx5_modify_raw_qp_param { + u16 operation; +}; + static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); @@ -1862,7 +1866,7 @@ static void get_cqs(enum ib_qp_type qp_type, } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation); + const struct mlx5_modify_raw_qp_param *raw_qp_param); static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { @@ -1887,8 +1891,11 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); } else { - err = modify_raw_packet_qp(dev, qp, - MLX5_CMD_OP_2RST_QP); + struct mlx5_modify_raw_qp_param raw_qp_param = { + .operation = MLX5_CMD_OP_2RST_QP + }; + + err = modify_raw_packet_qp(dev, qp, &raw_qp_param); } if (err) mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", @@ -2421,7 +2428,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation) + const struct mlx5_modify_raw_qp_param *raw_qp_param) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; @@ -2430,7 +2437,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int sq_state; int err; - switch (operation) { + switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; sq_state = MLX5_SQC_STATE_RDY; @@ -2689,11 +2696,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) - err = modify_raw_packet_qp(dev, qp, op); - else + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + struct mlx5_modify_raw_qp_param raw_qp_param = {}; + + raw_qp_param.operation = op; + err = modify_raw_packet_qp(dev, qp, &raw_qp_param); + } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); + } + if (err) goto out; From eb49ab0c5f3d8e5efb696f100978bf966ecf6be3 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Sun, 28 Aug 2016 12:25:53 +0300 Subject: [PATCH 18/75] IB/mlx5: Add port counter support for raw packet QP Counters weren't updated due to raw packet QPs' traffic since the counter-id was not associated with the QP. Added support for associating the q-counter-id with the raw packet QP. The attachment is done only when changing RQ raw packet QP state from RST to INIT in modify-RQ command. FW support is required for the above, without this support raw packet QP counters will not count. Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 40 ++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7686ee89189b88..b4dc71bb36eb30 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,8 +77,15 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +enum raw_qp_set_mask_map { + MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, +}; + struct mlx5_modify_raw_qp_param { u16 operation; + + u32 set_mask; /* raw_qp_set_mask_map */ + u8 rq_q_ctr_id; }; static void get_cqs(enum ib_qp_type qp_type, @@ -2369,8 +2376,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask) return result; } -static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, - struct mlx5_ib_rq *rq, int new_state) +static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { void *in; void *rqc; @@ -2387,7 +2395,17 @@ static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); - err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen); + if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { + if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); + MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); + } else + pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", + dev->ib_dev.name); + } + + err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); if (err) goto out; @@ -2454,15 +2472,17 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - /* Nothing to do here... */ - return 0; + if (raw_qp_param->set_mask) + return -EINVAL; + else + return 0; default: WARN_ON(1); return -EINVAL; } if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state); + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } @@ -2520,6 +2540,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; + struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; int sqd_event; @@ -2660,8 +2681,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; - struct mlx5_ib_port *mibport = &dev->port[port_num]; - + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->q_cnt_id) << 24); } @@ -2700,6 +2720,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; + raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; + } err = modify_raw_packet_qp(dev, qp, &raw_qp_param); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, From 7055a29471eebf4b62687944694222635ed44b09 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 28 Aug 2016 14:16:30 +0300 Subject: [PATCH 19/75] IB/mlx5: Fix steering resource leak Fix multicast flow rule leak on adding unicast rule failure. Fixes: 038d2ef87572 ('IB/mlx5: Add flow steering support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 840fbd0c9dca4c..87988fbc87b411 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1861,6 +1861,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rule(handler->rule); kfree(handler); handler = handler_ucast; } else { From 5497adc63213e2b9b27c23e42a33e259d59bfe28 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 28 Aug 2016 14:16:31 +0300 Subject: [PATCH 20/75] IB/mlx5: Save flow table priority handler instead of index Saving the flow table priority object's pointer in the flow handle is necessary for downstream patches since the sniffer flow table isn't placed at the standard flow_db structure but in a different database. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 87988fbc87b411..de7b5b3a4df85b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1654,7 +1654,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) } mlx5_del_flow_rule(handler->rule); - put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); kfree(handler); @@ -1776,7 +1776,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, goto free; } - handler->prio = ft_prio - dev->flow_db.prios; + handler->prio = ft_prio; ft_prio->flow_table = ft; free: diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a59034aaa297d4..a24cc20363cbd7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -150,7 +150,7 @@ struct mlx5_ib_flow_prio { struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; - unsigned int prio; + struct mlx5_ib_flow_prio *prio; struct mlx5_flow_rule *rule; }; From dd063d0e6c78b53090a7b3593b0fe40e449b404a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 28 Aug 2016 14:16:32 +0300 Subject: [PATCH 21/75] IB/mlx5: Fix coverity warning Fix covertiy warning of passing "&flow_attr" to function "create_flow_rule" which uses it as an array. In addition pass flow attributes argument as const. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index de7b5b3a4df85b..dd766d67a2f585 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1437,7 +1437,7 @@ static bool outer_header_zero(u32 *match_criteria) } static int parse_flow_attr(u32 *match_c, u32 *match_v, - union ib_flow_spec *ib_spec) + const union ib_flow_spec *ib_spec) { void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -1605,7 +1605,7 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) is_multicast_ether_addr(eth_spec->val.dst_mac); } -static bool is_valid_attr(struct ib_flow_attr *flow_attr) +static bool is_valid_attr(const struct ib_flow_attr *flow_attr) { union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); bool has_ipv4_spec = false; @@ -1729,13 +1729,13 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, + const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; - void *ib_flow = flow_attr + 1; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; u32 action; int err = 0; From d9d4980af21df94f527caeecd35bfa4bafe38c9c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 28 Aug 2016 14:16:33 +0300 Subject: [PATCH 22/75] IB/mlx5: Increase flow table reference count in create rule Move the reference count increasing of flow table to be in create_flow_rule, it will increase the reference count for each rule creation and not for each flow. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index dd766d67a2f585..79f0486f2b1fd1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1776,6 +1776,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, goto free; } + ft_prio->refcount++; handler->prio = ft_prio; ft_prio->flow_table = ft; @@ -1800,6 +1801,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de flow_attr, dst); if (IS_ERR(handler_dst)) { mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_dst; } else { @@ -1862,6 +1864,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de dst); if (IS_ERR(handler_ucast)) { mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_ucast; } else { @@ -1928,7 +1931,6 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto destroy_ft; } - ft_prio->refcount++; mutex_unlock(&dev->flow_db.lock); kfree(dst); From cc0e5d42351de1f9233738697718d0eed4396536 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 28 Aug 2016 14:16:34 +0300 Subject: [PATCH 23/75] IB/mlx5: Add sniffer support to steering Add support to create sniffer rule. This rule receive all incoming and outgoing packets from the port. A user could create such rule by using IB_FLOW_ATTR_SNIFFER type. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 74 +++++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 79f0486f2b1fd1..5e785e2f24c061 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1649,6 +1649,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) list_for_each_entry_safe(iter, tmp, &handler->list, list) { mlx5_del_flow_rule(iter->rule); + put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } @@ -1670,10 +1671,16 @@ static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) return priority; } +enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX +}; + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr) + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; @@ -1704,6 +1711,19 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, &num_entries, &num_groups); prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-ENOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db.sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; } if (!ns) @@ -1875,6 +1895,43 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de return handler; } +static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + +err_tx: + mlx5_del_flow_rule(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); +err: + return ERR_PTR(err); +} + static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) @@ -1882,6 +1939,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; @@ -1899,11 +1957,19 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_lock(&dev->flow_db.lock); - ft_prio = get_flow_table(dev, flow_attr); + ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; @@ -1920,6 +1986,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); } else { err = -EINVAL; goto destroy_ft; @@ -1938,6 +2006,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, destroy_ft: put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); unlock: mutex_unlock(&dev->flow_db.lock); kfree(dst); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a24cc20363cbd7..e1b120574792a7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -142,6 +142,7 @@ struct mlx5_ib_pd { #define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) +#define MLX5_IB_NUM_SNIFFER_FTS 2 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@ -156,6 +157,7 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done From 1f02a09c38043f1c003ddbd144b6da7f269d0ca1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:30 +0300 Subject: [PATCH 24/75] IB/mlx4: Add validation to flow specifications parsing Add validation check that all set fields in flow specification are supported by vendor. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index a96e78c2c75ac3..f617c23f0be751 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1361,6 +1361,19 @@ struct mlx4_ib_steering { union ib_gid gid; }; +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD dst_ip +#define LAST_TCP_UDP_FIELD src_port + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(struct mlx4_dev *dev, u32 qp_num, union ib_flow_spec *ib_spec, @@ -1370,6 +1383,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, ETH_ALEN); @@ -1379,6 +1395,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; break; case IB_FLOW_SPEC_IB: + if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IB; mlx4_spec->ib.l3_qpn = cpu_to_be32(qp_num); @@ -1388,6 +1407,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IPV4; mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; @@ -1397,6 +1419,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; + type = ib_spec->type == IB_FLOW_SPEC_TCP ? MLX4_NET_TRANS_RULE_ID_TCP : MLX4_NET_TRANS_RULE_ID_UDP; From c47ac6aee6df4dc17460f56dd66af0a271e8392c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:31 +0300 Subject: [PATCH 25/75] IB/mlx5: Add validation to flow specifications parsing Add validation check that all set fields in flow specification are supported by vendor. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 36 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5e785e2f24c061..e6fdc82912baac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1436,6 +1436,20 @@ static bool outer_header_zero(u32 *match_criteria) size - 1); } +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD dst_ip +#define LAST_IPV6_FIELD dst_ip +#define LAST_TCP_UDP_FIELD src_port + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec) { @@ -1445,8 +1459,8 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, outer_headers); switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - if (ib_spec->size != sizeof(ib_spec->eth)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), @@ -1486,8 +1500,8 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: - if (ib_spec->size != sizeof(ib_spec->ipv4)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@ -1512,8 +1526,8 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, sizeof(ib_spec->ipv4.val.dst_ip)); break; case IB_FLOW_SPEC_IPV6: - if (ib_spec->size != sizeof(ib_spec->ipv6)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@ -1538,8 +1552,9 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, sizeof(ib_spec->ipv6.val.dst_ip)); break; case IB_FLOW_SPEC_TCP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); @@ -1557,8 +1572,9 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); From 15dfbd6b4f058571f41be5b7917465dab2ff55da Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:32 +0300 Subject: [PATCH 26/75] IB/uverbs: Add support to extend flow steering specifications Flow steering specifications structures were implemented as in an extensible way that allows one to add new filters and new fields to existing filters. These specifications have never been extended, therefore the kernel flow specifications size and the user flow specifications size were must to be equal. In downstream patch, the IPv4 flow specifications type is extended to support TOS and TTL fields. To support an extension we change the flow specifications size condition test to be as following: * If the user flow specifications is bigger than the kernel specifications, we verify that all the bits which not in the kernel specifications are zeros and the flow is added only with the kernel specifications fields. * Otherwise, we add flow rule only with the user specifications fields. User space filters must be aligned with 32bits. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 98 +++++++++++++++++++++------- include/rdma/ib_verbs.h | 10 +++ 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 19c1ebf596ad15..b9fb256c25fad1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3078,51 +3078,98 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, return ret ? ret : in_len; } +static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +{ + /* Returns user space filter size, includes padding */ + return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; +} + +static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, + u16 ib_real_filter_sz) +{ + /* + * User space filter structures must be 64 bit aligned, otherwise this + * may pass, but we won't handle additional new attributes. + */ + + if (kern_filter_size > ib_real_filter_sz) { + if (memchr_inv(kern_spec_filter + + ib_real_filter_sz, 0, + kern_filter_size - ib_real_filter_sz)) + return -EINVAL; + return ib_real_filter_sz; + } + return kern_filter_size; +} + static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { + ssize_t actual_filter_sz; + ssize_t kern_filter_sz; + ssize_t ib_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + if (kern_spec->reserved) return -EINVAL; ib_spec->type = kern_spec->type; + kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + /* User flow spec size must be aligned to 4 bytes */ + if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) + return -EINVAL; + + kern_spec_val = (void *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = kern_spec_val + kern_filter_sz; + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); - if (ib_spec->eth.size != kern_spec->eth.size) + ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->eth.val, &kern_spec->eth.val, - sizeof(struct ib_flow_eth_filter)); - memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, - sizeof(struct ib_flow_eth_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_eth); + memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV4: - ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); - if (ib_spec->ipv4.size != kern_spec->ipv4.size) + ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, - sizeof(struct ib_flow_ipv4_filter)); - memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, - sizeof(struct ib_flow_ipv4_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_ipv4); + memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV6: - ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); - if (ib_spec->ipv6.size != kern_spec->ipv6.size) + ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, - sizeof(struct ib_flow_ipv6_filter)); - memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, - sizeof(struct ib_flow_ipv6_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_ipv6); + memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: - ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); - if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, - sizeof(struct ib_flow_tcp_udp_filter)); - memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, - sizeof(struct ib_flow_tcp_udp_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; default: return -EINVAL; @@ -3654,7 +3701,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } - flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL); + flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * + sizeof(union ib_flow_spec), GFP_KERNEL); if (!flow_attr) { err = -ENOMEM; goto err_put; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d74c76bf76d33d..4570d8844f63b1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1623,6 +1623,8 @@ struct ib_flow_eth_filter { u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_eth { @@ -1635,6 +1637,8 @@ struct ib_flow_spec_eth { struct ib_flow_ib_filter { __be16 dlid; __u8 sl; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ib { @@ -1647,6 +1651,8 @@ struct ib_flow_spec_ib { struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv4 { @@ -1659,6 +1665,8 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv6 { @@ -1671,6 +1679,8 @@ struct ib_flow_spec_ipv6 { struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_tcp_udp { From 989a3a8f91ba02f71b84ecde3b948388d8bf2200 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:33 +0300 Subject: [PATCH 27/75] IB/uverbs: Add more fields to IPv4 flow specification Add the following fields to IPv4 flow filter specification: 1. Type of Service 2. Time to Live 3. Flags 4. Protocol Signed-off-by: Maor Gottlieb Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 11 +++++++++++ include/uapi/rdma/ib_user_verbs.h | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4570d8844f63b1..990220b757f0c7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1648,9 +1648,20 @@ struct ib_flow_spec_ib { struct ib_flow_ib_filter mask; }; +/* IPv4 header flags */ +enum ib_ipv4_flags { + IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */ + IB_IPV4_MORE_FRAG = 0X4 /* For All fragmented packets except the + last have this flag set */ +}; + struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; + u8 proto; + u8 tos; + u8 ttl; + u8 flags; /* Must be last */ u8 real_sz[0]; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index a9ebb477dc7733..804f086835c6da 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -848,6 +848,10 @@ struct ib_uverbs_flow_spec_eth { struct ib_uverbs_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; + __u8 proto; + __u8 tos; + __u8 ttl; + __u8 flags; }; struct ib_uverbs_flow_spec_ipv4 { From a72c6a2b0e699fcbcf679b881d5af2683228ae98 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:34 +0300 Subject: [PATCH 28/75] IB/core: Add more fields to IPv6 flow specification Add the following fields to IPv6 flow filter specification: 1. Traffic Class 2. Flow Label 3. Next Header 4. Hop Limit Signed-off-by: Maor Gottlieb Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 4 ++++ include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 9 +++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b9fb256c25fad1..cb3f515a2285df 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3158,6 +3158,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, ib_spec->size = sizeof(struct ib_flow_spec_ipv6); memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || + (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) + return -EINVAL; break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 990220b757f0c7..0cec4da51eb7ea 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1676,6 +1676,10 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; + __be32 flow_label; + u8 next_hdr; + u8 traffic_class; + u8 hop_limit; /* Must be last */ u8 real_sz[0]; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 804f086835c6da..25225ebbc7d525 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -886,8 +886,13 @@ struct ib_uverbs_flow_spec_tcp_udp { }; struct ib_uverbs_flow_ipv6_filter { - __u8 src_ip[16]; - __u8 dst_ip[16]; + __u8 src_ip[16]; + __u8 dst_ip[16]; + __be32 flow_label; + __u8 next_hdr; + __u8 traffic_class; + __u8 hop_limit; + __u8 reserved; }; struct ib_uverbs_flow_spec_ipv6 { From ca0d47538528be18cbf45e3cce09862ddf37a3cf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:35 +0300 Subject: [PATCH 29/75] IB/mlx5: Add support in TOS and protocol to flow steering Add support to receive TOS or specific IPv4 protocol. Signed-off-by: Maor Gottlieb Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e6fdc82912baac..fbc0e42ef45093 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1436,9 +1436,23 @@ static bool outer_header_zero(u32 *match_criteria) size - 1); } +static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); +} + +static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); +} + #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD dst_ip +#define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD dst_ip #define LAST_TCP_UDP_FIELD src_port @@ -1524,6 +1538,12 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) From 466fa6d2e36408f697f9ff766f82003ef424bad1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:36 +0300 Subject: [PATCH 30/75] IB/mlx5: Add support of more IPv6 fields to flow steering Add support to receive Traffic Class, specific IPv6 protocol or IPv6 flow label. Signed-off-by: Maor Gottlieb Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 56 +++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fbc0e42ef45093..98844c1db55583 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1425,15 +1425,31 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) return 0; } -static bool outer_header_zero(u32 *match_criteria) +enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT +}; + +#define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + +static u8 get_match_criteria_enable(u32 *match_criteria) { - int size = MLX5_ST_SZ_BYTES(fte_match_param); - char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); + u8 match_criteria_enable; + + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; - return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, - outer_headers_c + 1, - size - 1); + return match_criteria_enable; } static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) @@ -1453,7 +1469,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD dst_ip +#define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port /* Field is the last supported field */ @@ -1471,6 +1487,11 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, outer_headers); void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) @@ -1570,6 +1591,21 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr); + + MLX5_SET(fte_match_set_misc, misc_params_c, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.mask.flow_label)); + MLX5_SET(fte_match_set_misc, misc_params_v, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.val.flow_label)); break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, @@ -1817,9 +1853,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } - /* Outer header support only */ - spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)) - << 0; + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, spec, From 350d0e4c7e4b03ed5646ac39ba4aac98bb3d9c56 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 14:58:18 +0300 Subject: [PATCH 31/75] IB/mlx5: Track asynchronous events on a receive work queue Track asynchronous events on a receive work queue by using the mlx5_core_create_rq_tracked API. In case a fatal error has occurred letting the IB layer know about by using the ib_wq event handler. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +++++- drivers/infiniband/hw/mlx5/qp.c | 34 ++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e1b120574792a7..f8a62a643613a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -227,7 +227,7 @@ struct mlx5_ib_wq { struct mlx5_ib_rwq { struct ib_wq ibwq; - u32 rqn; + struct mlx5_core_qp core_qp; u32 rq_num_pas; u32 log_rq_stride; u32 log_rq_size; @@ -664,6 +664,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } +static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) +{ + return container_of(core_qp, struct mlx5_ib_rwq, core_qp); +} + static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b4dc71bb36eb30..cf181ef5ed8efe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4536,6 +4536,28 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) return 0; } +static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) +{ + struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp); + struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device); + struct ib_event event; + + if (rwq->ibwq.event_handler) { + event.device = rwq->ibwq.device; + event.element.wq = &rwq->ibwq; + switch (type) { + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_WQ_FATAL; + break; + default: + mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn); + return; + } + + rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context); + } +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4573,7 +4595,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); - err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); kvfree(in); return err; } @@ -4689,7 +4711,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + @@ -4699,10 +4721,12 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, goto err_copy; } + rwq->core_qp.event = mlx5_ib_wq_event; + rwq->ibwq.event_handler = init_attr->event_handler; return &rwq->ibwq; err_copy: - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: destroy_user_rq(pd, rwq); err: @@ -4715,7 +4739,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(wq->pd, rwq); kfree(rwq); @@ -4847,7 +4871,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); - err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); kvfree(in); if (!err) rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; From 5ec8c83e3ad3ea4ea78798edcd4ad61e0041a174 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 18 Sep 2016 20:48:00 +0300 Subject: [PATCH 32/75] IB/mlx5: Port events in RoCE now rely on netdev events Since ib_query_port() in RoCE returns the state of its netdev as the port state, it makes sense to propagate the port up/down events to ib_core when the netdev port state changes, instead of relying on traditional core events. This also keeps both the event and ib_query_port() synchronized. Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 66 ++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 98844c1db55583..ed038b7a96fc2d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -107,13 +107,32 @@ static int mlx5_netdev_event(struct notifier_block *this, struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, roce.nb); - if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) - return NOTIFY_DONE; + switch (event) { + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + write_lock(&ibdev->roce.netdev_lock); + if (ndev->dev.parent == &ibdev->mdev->pdev->dev) + ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? + NULL : ndev; + write_unlock(&ibdev->roce.netdev_lock); + break; - write_lock(&ibdev->roce.netdev_lock); - if (ndev->dev.parent == &ibdev->mdev->pdev->dev) - ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - write_unlock(&ibdev->roce.netdev_lock); + case NETDEV_UP: + case NETDEV_DOWN: + if (ndev == ibdev->roce.netdev && ibdev->ib_active) { + struct ib_event ibev = {0}; + + ibev.device = &ibdev->ib_dev; + ibev.event = (event == NETDEV_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + ibev.element.port_num = 1; + ib_dispatch_event(&ibev); + } + break; + + default: + break; + } return NOTIFY_DONE; } @@ -2267,14 +2286,19 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, break; case MLX5_DEV_EVENT_PORT_UP: - ibev.event = IB_EVENT_PORT_ACTIVE; - port = (u8)param; - break; - case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: - ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; + + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return; + + ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; case MLX5_DEV_EVENT_LID_CHANGE: @@ -2679,14 +2703,24 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } +static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) +{ + if (dev->roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->roce.nb); + dev->roce.nb.notifier_call = NULL; + } +} + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; dev->roce.nb.notifier_call = mlx5_netdev_event; err = register_netdevice_notifier(&dev->roce.nb); - if (err) + if (err) { + dev->roce.nb.notifier_call = NULL; return err; + } err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) @@ -2695,14 +2729,13 @@ static int mlx5_enable_roce(struct mlx5_ib_dev *dev) return 0; err_unregister_netdevice_notifier: - unregister_netdevice_notifier(&dev->roce.nb); + mlx5_remove_roce_notifier(dev); return err; } static void mlx5_disable_roce(struct mlx5_ib_dev *dev) { mlx5_nic_vport_disable_roce(dev->mdev); - unregister_netdevice_notifier(&dev->roce.nb); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@ -3051,8 +3084,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) destroy_dev_resources(&dev->devr); err_disable_roce: - if (ll == IB_LINK_LAYER_ETHERNET) + if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_roce(dev); + mlx5_remove_roce_notifier(dev); + } err_free_port: kfree(dev->port); @@ -3068,6 +3103,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); From 9ef9c640f4c5376189d8ca70f51a50a3d0b16914 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 18 Sep 2016 20:48:01 +0300 Subject: [PATCH 33/75] IB/mlx5: Merge vports flow steering during LAG This is done in two steps: 1) Issuing CREATE_VPORT_LAG in order to have Ethernet traffic from both ports arriving on PF0 root flowtable, so we will be able to catch all raw-eth traffic on PF0. 2) Creation of LAG demux flowtable in order to direct all non-raw-eth traffic back to its source port, assuring that normal Ethernet traffic "jumps" to the root flowtable of its RX port (non-LAG behavior). Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 49 ++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 50 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ed038b7a96fc2d..70e7c8dcd4e0da 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2703,6 +2703,47 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } +static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, + MLX5_FLOW_NAMESPACE_LAG); + struct mlx5_flow_table *ft; + int err; + + if (!ns || !mlx5_lag_is_active(mdev)) + return 0; + + err = mlx5_cmd_create_vport_lag(mdev); + if (err) + return err; + + ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_destroy_vport_lag; + } + + dev->flow_db.lag_demux_ft = ft; + return 0; + +err_destroy_vport_lag: + mlx5_cmd_destroy_vport_lag(mdev); + return err; +} + +static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (dev->flow_db.lag_demux_ft) { + mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); + dev->flow_db.lag_demux_ft = NULL; + + mlx5_cmd_destroy_vport_lag(mdev); + } +} + static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) { if (dev->roce.nb.notifier_call) { @@ -2726,8 +2767,15 @@ static int mlx5_enable_roce(struct mlx5_ib_dev *dev) if (err) goto err_unregister_netdevice_notifier; + err = mlx5_roce_lag_init(dev); + if (err) + goto err_disable_roce; + return 0; +err_disable_roce: + mlx5_nic_vport_disable_roce(dev->mdev); + err_unregister_netdevice_notifier: mlx5_remove_roce_notifier(dev); return err; @@ -2735,6 +2783,7 @@ static int mlx5_enable_roce(struct mlx5_ib_dev *dev) static void mlx5_disable_roce(struct mlx5_ib_dev *dev) { + mlx5_roce_lag_cleanup(dev); mlx5_nic_vport_disable_roce(dev->mdev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f8a62a643613a6..53e1f1dc000062 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -158,6 +158,7 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; + struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done From 88621dfe90677a6a07ae1aff9feb1773226bc8b0 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 18 Sep 2016 20:48:02 +0300 Subject: [PATCH 34/75] IB/mlx5: Port status track LAG master, when LAG is active When LAG is active, port up/down events should be triggered by tracking the LAG master, and not one of the two slave netdevs. In the same manner, ib_query_port() should return the details of the LAG master. Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 70e7c8dcd4e0da..00a3616f395c6c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -118,8 +118,17 @@ static int mlx5_netdev_event(struct notifier_block *this, break; case NETDEV_UP: - case NETDEV_DOWN: - if (ndev == ibdev->roce.netdev && ibdev->ib_active) { + case NETDEV_DOWN: { + struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + struct net_device *upper = NULL; + + if (lag_ndev) { + upper = netdev_master_upper_dev_get(lag_ndev); + dev_put(lag_ndev); + } + + if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) + && ibdev->ib_active) { struct ib_event ibev = {0}; ibev.device = &ibdev->ib_dev; @@ -129,6 +138,7 @@ static int mlx5_netdev_event(struct notifier_block *this, ib_dispatch_event(&ibev); } break; + } default: break; @@ -143,6 +153,10 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; + ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + if (ndev) + return ndev; + /* Ensure ndev does not disappear before we invoke dev_hold() */ read_lock(&ibdev->roce.netdev_lock); @@ -158,7 +172,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); - struct net_device *ndev; + struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; @@ -182,6 +196,17 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) return 0; + if (mlx5_lag_is_active(dev->mdev)) { + rcu_read_lock(); + upper = netdev_master_upper_dev_get_rcu(ndev); + if (upper) { + dev_put(ndev); + ndev = upper; + dev_hold(ndev); + } + rcu_read_unlock(); + } + if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; props->phys_state = 5; From 4babcf97c551dc57a9b7149ef1f72511b5e97332 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 18 Sep 2016 20:48:03 +0300 Subject: [PATCH 35/75] IB/mlx5: Set unique device name on LAG IB bond device name is now 'mlx5_bond_X', instead of 'mlx5_X'. Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 00a3616f395c6c..24f5f48e9ddf36 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2924,6 +2924,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) struct mlx5_ib_dev *dev; enum rdma_link_layer ll; int port_type_cap; + const char *name; int err; int i; @@ -2956,7 +2957,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); - strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + if (!mlx5_lag_is_active(mdev)) + name = "mlx5_%d"; + else + name = "mlx5_bond_%d"; + + strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; From 13eab21f92de21f324fd6afe1aeca310446b8731 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 18 Sep 2016 20:48:04 +0300 Subject: [PATCH 36/75] IB/mlx5: LAG QP load balancing When LAG is active, QP tx affinity (the physical port to which a QP is affined, or the TIS in case of raw-eth) is set in a round robin fashion during state transition from RESET to INIT. Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 63 +++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 53e1f1dc000062..40fe1a65402b13 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -605,6 +605,7 @@ struct mlx5_roce { rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; + atomic_t next_port; }; struct mlx5_ib_dev { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index cf181ef5ed8efe..2ec88c649ac035 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1873,7 +1873,8 @@ static void get_cqs(enum ib_qp_type qp_type, } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct mlx5_modify_raw_qp_param *raw_qp_param); + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 lag_tx_affinity); static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { @@ -1902,7 +1903,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) .operation = MLX5_CMD_OP_2RST_QP }; - err = modify_raw_packet_qp(dev, qp, &raw_qp_param); + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0); } if (err) mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", @@ -2166,6 +2167,31 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, return err; } +static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, u8 tx_affinity) +{ + void *in; + void *tisc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tis_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); + + tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); + MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); + + err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); + + kvfree(in); + + return err; +} + static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, @@ -2446,7 +2472,8 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct mlx5_modify_raw_qp_param *raw_qp_param) + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 tx_affinity) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; @@ -2487,8 +2514,16 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { + if (tx_affinity) { + err = modify_raw_packet_tx_affinity(dev->mdev, sq, + tx_affinity); + if (err) + return err; + } + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + } return 0; } @@ -2547,6 +2582,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; u16 op; + u8 tx_affinity = 0; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2576,6 +2612,23 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } + if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { + if ((ibqp->qp_type == IB_QPT_RC) || + (ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_SQPN_QP1)) || + (ibqp->qp_type == IB_QPT_UC) || + (ibqp->qp_type == IB_QPT_RAW_PACKET) || + (ibqp->qp_type == IB_QPT_XRC_INI) || + (ibqp->qp_type == IB_QPT_XRC_TGT)) { + if (mlx5_lag_is_active(dev->mdev)) { + tx_affinity = (unsigned int)atomic_add_return(1, + &dev->roce.next_port) % + MLX5_MAX_PORTS + 1; + context->flags |= cpu_to_be32(tx_affinity << 24); + } + } + } + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || @@ -2724,7 +2777,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } - err = modify_raw_packet_qp(dev, qp, &raw_qp_param); + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); From 4534d859020190c8fa04d899ddca656191339de1 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:27:46 +0530 Subject: [PATCH 37/75] IB/sa : Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "ib_nl" queues work item &ib_nl_timed_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b9bf7aa055e76f..81b742ca163911 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2015,7 +2015,7 @@ int ib_sa_init(void) goto err2; } - ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq"); + ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM); if (!ib_nl_wq) { ret = -ENOMEM; goto err3; From 1c99e299ba62f72b24e8ec2f4436f574d7433f11 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:28:07 +0530 Subject: [PATCH 38/75] IB/mad: Remove deprecated create_singlethread_workqueue The workqueue "ib_nl" queues work items &ib_nl_timed_work and &mad_agent_priv->local_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 95d33a3572e6d2..40cbd6bdb73be1 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3177,7 +3177,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error7; snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = create_singlethread_workqueue(name); + port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!port_priv->wq) { ret = -ENOMEM; goto error8; From 01013cdf06d930dbc293d001ccf509200e403057 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:28:36 +0530 Subject: [PATCH 39/75] IB/multicast: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "mcast_wq" queues work item &group->work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 3a3c5d73bbfc83..49ecde98a3d90a 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -882,7 +882,7 @@ int mcast_init(void) { int ret; - mcast_wq = create_singlethread_workqueue("ib_mcast"); + mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; From a190d3b07c9f8046784681fa6167005711ed3f5e Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:29:10 +0530 Subject: [PATCH 40/75] IB/ucma: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "close_wq" queues work items &ctx->close_work (maps to ucma_close_id) and &con_req_eve->close_work (maps to ucma_close_event_id). It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2825ece91d3c56..9520154f1d7c96 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1638,7 +1638,8 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - file->close_wq = create_singlethread_workqueue("ucma_close_id"); + file->close_wq = alloc_ordered_workqueue("ucma_close_id", + WQ_MEM_RECLAIM); if (!file->close_wq) { kfree(file); return -ENOMEM; From dee9acbb3254f67f7b0ce0766f13e25c5618ae78 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:29:35 +0530 Subject: [PATCH 41/75] IB/cma: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "cma_wq" queues work item cma_work_handler. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1bd3defae..8954792f1acc63 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4363,7 +4363,7 @@ static int __init cma_init(void) { int ret; - cma_wq = create_singlethread_workqueue("rdma_cm"); + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; From f54816261c2b36e59e43f02d654ac5834a71537d Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:30:32 +0530 Subject: [PATCH 42/75] IB/addr: Remove deprecated create_singlethread_workqueue The workqueue "addr_wq" queues a single work item &work and hence doesn't require ordering. Also, it is being used on a memory reclaim path. Hence, it has been converted to use alloc_workqueue with WQ_MEM_RECLAIM set. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1374541a45287f..b136d3acc5bde6 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -800,7 +800,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = create_singlethread_workqueue("ib_addr"); + addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); if (!addr_wq) return -ENOMEM; From daf0879f4cb9b413d8f82404350cf956ce3e458b Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:37:44 +0530 Subject: [PATCH 43/75] IB/iwcm: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "iwcm_wq" queues work item &work(maps to cm_work_handler). It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 357624f8b9d31d..5495e22839a7eb 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1160,7 +1160,7 @@ static int __init iw_cm_init(void) if (ret) pr_err("iw_cm: couldn't register netlink callbacks\n"); - iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) return -ENOMEM; From b41ff7cdb50a67974cdb62b1a8b500851bad101c Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:38:20 +0530 Subject: [PATCH 44/75] iw_cxgb3: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "workq" queues work item &skb_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 04bbf172abde10..65ee64400deb91 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -2258,7 +2258,7 @@ int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); - workq = create_singlethread_workqueue("iw_cxgb3"); + workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); if (!workq) return -ENOMEM; From b59114bbca195f381a3b10b8172ee9b0511ce7de Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:38:47 +0530 Subject: [PATCH 45/75] IB/qib: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "qib" queues work item &priv->s_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index f253111e682ea7..1730aa839a471f 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -614,8 +614,8 @@ static int qib_create_workqueues(struct qib_devdata *dd) snprintf(wq_name, sizeof(wq_name), "qib%d_%d", dd->unit, pidx); - ppd->qib_wq = - create_singlethread_workqueue(wq_name); + ppd->qib_wq = alloc_ordered_workqueue(wq_name, + WQ_MEM_RECLAIM); if (!ppd->qib_wq) goto wq_error; } From 52ee1a05d2724b71327d53b5fc152f1344ff2b3f Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:39:14 +0530 Subject: [PATCH 46/75] iw_cxgb4: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "workq" queues work item &skb_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3aca7f6171b428..c9661d8f11dcc3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -4357,7 +4357,7 @@ int __init c4iw_cm_init(void) spin_lock_init(&timeout_lock); skb_queue_head_init(&rxq); - workq = create_singlethread_workqueue("iw_cxgb4"); + workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); if (!workq) return -ENOMEM; From 401f047e1b10bdcea8d351c52366b94688c271fb Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:39:37 +0530 Subject: [PATCH 47/75] IB/mthca: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "catas_wq" in triggering a device remove and causing a device reset when a catastrophic error occurs. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_catas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 712d2a30fbe59d..f6474c24f193c4 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -187,7 +187,7 @@ int __init mthca_catas_init(void) { INIT_WORK(&catas_work, catas_reset); - catas_wq = create_singlethread_workqueue("mthca_catas"); + catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM); if (!catas_wq) return -ENOMEM; From 73b976954360b4b37570ab001e85d8dde0c345b7 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:40:09 +0530 Subject: [PATCH 48/75] i40iw_main: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "virtchnl_wq" queues work items i40iw_cqp_generic_worker and i40iw_cqp_manage_hmc_fcn_worker. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 6e9081380a276c..798335fa3105e2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1613,7 +1613,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) break; - iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch"); + iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM); i40iw_register_notifiers(); iwdev->init_state = INET_NOTIFIER; status = i40iw_add_mac_ip(iwdev); From 5e9ff9b0bf0a20e87c75506604a1233f6c37f335 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:40:53 +0530 Subject: [PATCH 49/75] i40iw_cm: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "event_wq" is involved in event handling and queues i40iw_cm_event_handler. The workqueue "disconn_wq" is involved in closing connection and queues i40iw_disconnect_worker. Both workqueues have been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5026dc79978a7c..c490f8d4986441 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3166,8 +3166,11 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); - cm_core->event_wq = create_singlethread_workqueue("iwewq"); - cm_core->disconn_wq = create_singlethread_workqueue("iwdwq"); + cm_core->event_wq = alloc_ordered_workqueue("iwewq", + WQ_MEM_RECLAIM); + + cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", + WQ_MEM_RECLAIM); } /** From 3c856c82adc2cd4229ea91a82c00e91df5d44ddc Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:41:18 +0530 Subject: [PATCH 50/75] IB/mlx5: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "cache->wq" queues work items &ent->work (maps to cache_work_func) and &ent->dwork(maps to delayed_cache_work_func). It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Leon Romanovsky Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6f7e34753abcae..873d81dbde41fe 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -611,7 +611,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int err; int i; - cache->wq = create_singlethread_workqueue("mkey_cache"); + cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); return -ENOMEM; From 72a36d114187caaf434ebd5764b5f4840236ddb3 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:42:19 +0530 Subject: [PATCH 51/75] IB/mlx5/odp: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "mlx5_ib_page_fault_wq" queues work item &qp_pfault->work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 34e79e709c67ba..cacb631a7b0a98 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -782,8 +782,8 @@ void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) int __init mlx5_ib_odp_init(void) { - mlx5_ib_page_fault_wq = - create_singlethread_workqueue("mlx5_ib_page_faults"); + mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults", + WQ_MEM_RECLAIM); if (!mlx5_ib_page_fault_wq) return -ENOMEM; From 41cd3944057a5a1e5ff56a92a935576bff618b17 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:42:48 +0530 Subject: [PATCH 52/75] IB/mlx4: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "wq" queues work items &dm[i]->work, &ew->work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f617c23f0be751..f07cbf87db5216 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3244,7 +3244,7 @@ static int __init mlx4_ib_init(void) { int err; - wq = create_singlethread_workqueue("mlx4_ib"); + wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM); if (!wq) return -ENOMEM; From 90b14b3237ba58d4cc8f523e225e6a22f8e4927c Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:43:10 +0530 Subject: [PATCH 53/75] IB/mlx4/mad: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "wq" queues work item &ctx->work and the workqueue "ud_wq" queues work item &dm[i]->work. Both the workqueues have been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 517346f8ed7581..9b4b21bd0222d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -2070,7 +2070,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, } snprintf(name, sizeof name, "mlx4_ibt%d", port); - ctx->wq = create_singlethread_workqueue(name); + ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); ret = -ENOMEM; @@ -2078,7 +2078,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, } snprintf(name, sizeof name, "mlx4_ibud%d", port); - ctx->ud_wq = create_singlethread_workqueue(name); + ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); ret = -ENOMEM; From fcf621dd2ba2950a3a6717fc0a3c1f908107a546 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:43:35 +0530 Subject: [PATCH 54/75] IB/mlx4/mcg: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "mcg_wq" queues work items &group->work and &group->timeout_work. The workqueue "clean_wq" queues work item mcg_clean_task. Both have been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07915b07e..7d30be0f287bb2 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1045,7 +1045,7 @@ int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx) atomic_set(&ctx->tid, 0); sprintf(name, "mlx4_ib_mcg%d", ctx->port); - ctx->mcg_wq = create_singlethread_workqueue(name); + ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->mcg_wq) return -ENOMEM; @@ -1246,7 +1246,7 @@ void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave) int mlx4_ib_mcg_init(void) { - clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); + clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM); if (!clean_wq) return -ENOMEM; From 9e5df3125c9d88d402dcb97abdc6b7dc6a6c8841 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:44:03 +0530 Subject: [PATCH 55/75] IB/nes: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() replaces deprecated create_singlethread_workqueue(). The workqueue "event_wq" queues work item &event->event_work and the workqueue "disconn_wq" queues work item work (maps to g_cm_core->disconn_wq). WQ_MEM_RECLAIM has not been set since the workqueues are not being used on a memory reclaim path. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 7f0aa23aef9dad..57db9b332f446f 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2692,12 +2692,12 @@ static struct nes_cm_core *nes_cm_alloc_core(void) nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core); nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); - cm_core->event_wq = create_singlethread_workqueue("nesewq"); + cm_core->event_wq = alloc_ordered_workqueue("nesewq", 0); if (!cm_core->event_wq) goto out_free_cmcore; cm_core->post_event = nes_cm_post_event; nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); - cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + cm_core->disconn_wq = alloc_ordered_workqueue("nesdwq", 0); if (!cm_core->disconn_wq) goto out_free_wq; From 855cda68285ed02491f73eb86894ed38804fdfbc Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:44:26 +0530 Subject: [PATCH 56/75] IB/ipoib: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() replaces deprecated create_singlethread_workqueue(). The workqueue "ipoib_workqueue" that is used for all flush operations for the device. WQ_MEM_RECLAIM has been set since the flush operations may need to complete in order for other network functions to continue, and the memory reclaim operation might need the network functioning in order to make progress. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 74bcaa0642261e..e95c02ee05c0e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2196,7 +2196,8 @@ static int __init ipoib_init_module(void) * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", + WQ_MEM_RECLAIM); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; From b4541f6f883aec0311c56bd64d0963ac7a9ae53b Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:44:47 +0530 Subject: [PATCH 57/75] IB/ipoib_verbs: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "wq" queues mulitple work items viz &priv->restart_task, &priv->cm.rx_reap_task, &priv->cm.skb_task, &priv->neigh_reap_task, &priv->ah_reap_task, &priv->mcast_task and &priv->carrier_on_task. The work items require strict execution ordering. Hence, an ordered dedicated workqueue has been used. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 6067f075772a40..189dcd1709d22e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -157,7 +157,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) * the various IPoIB tasks assume they will never race against * themselves, so always use a single thread workqueue */ - priv->wq = create_singlethread_workqueue("ipoib_wq"); + priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM); if (!priv->wq) { printk(KERN_WARNING "ipoib: failed to allocate device WQ\n"); goto out_free_pd; From fb6375d7e6e743fc88a416cbfab89f608ed9ebcb Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:45:06 +0530 Subject: [PATCH 58/75] IB/mlx4/alias_GUID: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "wq" queues work item that maps to alias_guid_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index c74ef2620b859b..5e993904585254 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -881,7 +881,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); dev->sriov.alias_guid.ports_guid[i].wq = - create_singlethread_workqueue(alias_wq_name); + alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM); if (!dev->sriov.alias_guid.ports_guid[i].wq) { ret = -ENOMEM; goto err_thread; From 32f7451d1c6214169848cfea331e2c9d4067a650 Mon Sep 17 00:00:00 2001 From: "kernel@kyup.com" Date: Mon, 8 Aug 2016 17:14:22 +0300 Subject: [PATCH 59/75] ipoib: Make ipoib_warn ratelimited In certain cases it's possible to be flooded by warning messages. To cope with such situations make the ipoib_warn macro be ratelimited. To prevent accidental limiting of legitimate, bursty messages make the limit fairly liberal by allowing up to 100 messages in 10 seconds. Signed-off-by: Nikolay Borisov Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4f7d9b48df643c..7899167536e381 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -771,7 +771,13 @@ static inline void ipoib_unregister_debugfs(void) { } #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ - ipoib_printk(KERN_WARNING, priv, format , ## arg) +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + 10 * HZ /*10 seconds */, \ + 100); \ + if (__ratelimit(&_rs)) \ + ipoib_printk(KERN_WARNING, priv, format , ## arg);\ +} while (0) extern int ipoib_sendq_size; extern int ipoib_recvq_size; From bd99fdea420b00925e9b83a50f2ccc5e1f07ef7d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Thu, 25 Aug 2016 10:57:07 -0700 Subject: [PATCH 60/75] IB/{core,hw}: Add constant for node_desc Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/hfi1/verbs.c | 3 ++- drivers/infiniband/hw/mlx4/mad.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 6 +++--- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 6 +++--- drivers/infiniband/hw/mthca/mthca_mad.c | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 5 +++-- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/qib/qib_verbs.c | 3 ++- include/rdma/ib_verbs.h | 6 ++++-- 13 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 15defefecb4f7a..c1fb545e8d7848 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1193,7 +1193,7 @@ static ssize_t set_node_desc(struct device *device, if (!dev->modify_device) return -EIO; - memcpy(desc.node_desc, buf, min_t(int, count, 64)); + memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 3edb80644b5310..b47be87d5a53a4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1396,6 +1396,7 @@ int iwch_register_device(struct iwch_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index df127ce6b6ec31..645e606a17c569 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -563,6 +563,7 @@ int c4iw_register_device(struct c4iw_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b359540901db3..f803f7b5ef5db1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1423,7 +1423,8 @@ static int modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9b4b21bd0222d6..d8886d051d4bc3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -345,7 +345,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f07cbf87db5216..3ae64cefc39ef8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -886,7 +886,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* @@ -897,7 +897,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (IS_ERR(mailbox)) return 0; - memcpy(mailbox->buf, props->node_desc, 64); + memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@ -2025,7 +2025,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 364aab9f3c9e23..39e58489dcc2c2 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -394,7 +394,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) if (err) goto out; - memcpy(node_desc, out_mad->data, 64); + memcpy(node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); out: kfree(in_mad); kfree(out_mad); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 24f5f48e9ddf36..29878aa716ee3a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -472,7 +472,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, } struct mlx5_reg_node_desc { - u8 desc[64]; + u8 desc[IB_DEVICE_NODE_DESC_MAX]; }; static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) @@ -920,13 +920,13 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ - memcpy(&in, props->node_desc, 64); + memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, sizeof(out), MLX5_REG_NODE_DESC, 0, 1); if (err) return err; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7c3f2fb44ba51d..9139405c481000 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -153,7 +153,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { mutex_lock(&to_mdev(dev)->cap_mask_mutex); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(dev)->cap_mask_mutex); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index da2335f7f7c357..b697a0f77231fd 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -193,7 +193,8 @@ static int mthca_modify_device(struct ib_device *ibdev, if (mask & IB_DEVICE_MODIFY_NODE_DESC) { if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); } @@ -1138,7 +1139,7 @@ static int mthca_init_node_data(struct mthca_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 07d0c6c5b04657..15e35acc690dc9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,6 +119,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); + BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); dev->ibdev.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fd1dfbce553974..2d7e52619b554e 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1370,7 +1370,8 @@ static int qib_modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct qib_ibport *ibp = &dd->pport[i].ibport_data; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0cec4da51eb7ea..d3fba0a56e1707 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -537,9 +537,11 @@ enum ib_device_modify_flags { IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; +#define IB_DEVICE_NODE_DESC_MAX 64 + struct ib_device_modify { u64 sys_image_guid; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; }; enum ib_port_modify_flags { @@ -2077,7 +2079,7 @@ struct ib_device { u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; From 3085e29e2f832cbf77ddeeffe715809a31254b5f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:11 +0300 Subject: [PATCH 61/75] IB/mlx5: Move and decouple user vendor structures This patch decouples and moves vendors specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libmlx5) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/mlx5/cq.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 37 +++++++++++++++ drivers/infiniband/hw/mlx5/mr.c | 1 - drivers/infiniband/hw/mlx5/qp.c | 1 - drivers/infiniband/hw/mlx5/srq.c | 1 - include/uapi/rdma/Kbuild | 1 + .../user.h => include/uapi/rdma/mlx5-abi.h | 45 ++----------------- 9 files changed, 42 insertions(+), 47 deletions(-) rename drivers/infiniband/hw/mlx5/user.h => include/uapi/rdma/mlx5-abi.h (84%) diff --git a/MAINTAINERS b/MAINTAINERS index 87e23cda8dc1a6..e76efbc414cd8c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7819,6 +7819,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ S: Supported F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ +F: include/uapi/rdma/mlx5-abi.h MELLANOX MLX5 IB driver M: Matan Barak diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 35a9f718e66948..1188fef08450dd 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,7 +35,6 @@ #include #include #include "mlx5_ib.h" -#include "user.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 29878aa716ee3a..f4160d56dc4f3b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -54,7 +54,6 @@ #include #include #include -#include "user.h" #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 40fe1a65402b13..1df8a67d4f0232 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -44,6 +44,7 @@ #include #include #include +#include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -955,4 +956,40 @@ static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, return 0; } + +static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_qp *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} + +static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_srq *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 873d81dbde41fe..d4ad672b905bf0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -40,7 +40,6 @@ #include #include #include "mlx5_ib.h" -#include "user.h" enum { MAX_PENDING_REG_MR = 8, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2ec88c649ac035..9d97a71a133530 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -35,7 +35,6 @@ #include #include #include "mlx5_ib.h" -#include "user.h" /* not supported currently */ static int wq_signature; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index ed6ac52355f1b9..3857dbd9c95604 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -38,7 +38,6 @@ #include #include "mlx5_ib.h" -#include "user.h" /* not supported currently */ static int srq_signature; diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 4edb0f2b4f9f73..e9b0ca24f29dcc 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -7,3 +7,4 @@ header-y += rdma_netlink.h header-y += rdma_user_cm.h header-y += hfi/ header-y += rdma_user_rxe.h +header-y += mlx5-abi.h diff --git a/drivers/infiniband/hw/mlx5/user.h b/include/uapi/rdma/mlx5-abi.h similarity index 84% rename from drivers/infiniband/hw/mlx5/user.h rename to include/uapi/rdma/mlx5-abi.h index 0e49d5b30a4c5a..f5d0f4e83b59f4 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -30,13 +30,11 @@ * SOFTWARE. */ -#ifndef MLX5_IB_USER_H -#define MLX5_IB_USER_H +#ifndef MLX5_ABI_USER_H +#define MLX5_ABI_USER_H #include -#include "mlx5_ib.h" - enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, @@ -50,7 +48,6 @@ enum { MLX5_WQ_FLAG_SIGNATURE = 1 << 0, }; - /* Increment this value if any changes that break userspace ABI * compatibility are made. */ @@ -249,40 +246,4 @@ struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; }; - -static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, - struct mlx5_ib_create_qp *ucmd, - int inlen, - u32 *user_index) -{ - u8 cqe_version = ucontext->cqe_version; - - if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) - return 0; - - if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != - !!cqe_version)) - return -EINVAL; - - return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); -} - -static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, - struct mlx5_ib_create_srq *ucmd, - int inlen, - u32 *user_index) -{ - u8 cqe_version = ucontext->cqe_version; - - if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) - return 0; - - if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != - !!cqe_version)) - return -EINVAL; - - return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); -} -#endif /* MLX5_IB_USER_H */ +#endif /* MLX5_ABI_USER_H */ From a85fb3383340b417132e5731f9694840660887cb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:12 +0300 Subject: [PATCH 62/75] IB/cxgb3: Move user vendor structures This patch moves cxgb3 vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libcxgb3) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/cxgb3/iwch.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- include/uapi/rdma/Kbuild | 1 + .../hw/cxgb3/iwch_user.h => include/uapi/rdma/cxgb3-abi.h | 8 +++++--- 5 files changed, 9 insertions(+), 5 deletions(-) rename drivers/infiniband/hw/cxgb3/iwch_user.h => include/uapi/rdma/cxgb3-abi.h (95%) diff --git a/MAINTAINERS b/MAINTAINERS index e76efbc414cd8c..8ebd644becef0f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3460,6 +3460,7 @@ L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported F: drivers/infiniband/hw/cxgb3/ +F: include/uapi/rdma/cxgb3-abi.h CXGB4 ETHERNET DRIVER (CXGB4) M: Hariprasad S diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 8e77dc543dd165..b3e11329801d7d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -36,7 +36,7 @@ #include "cxgb3_offload.h" #include "iwch_provider.h" -#include "iwch_user.h" +#include #include "iwch.h" #include "iwch_cm.h" diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b47be87d5a53a4..cba57bb53dba31 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -58,7 +58,7 @@ #include "iwch.h" #include "iwch_provider.h" #include "iwch_cm.h" -#include "iwch_user.h" +#include #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index e9b0ca24f29dcc..681d7189a01a7e 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -7,4 +7,5 @@ header-y += rdma_netlink.h header-y += rdma_user_cm.h header-y += hfi/ header-y += rdma_user_rxe.h +header-y += cxgb3-abi.h header-y += mlx5-abi.h diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/include/uapi/rdma/cxgb3-abi.h similarity index 95% rename from drivers/infiniband/hw/cxgb3/iwch_user.h rename to include/uapi/rdma/cxgb3-abi.h index a277c31fcaf7be..48a19bda071b8d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -29,8 +29,10 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __IWCH_USER_H__ -#define __IWCH_USER_H__ +#ifndef CXGB3_ABI_USER_H +#define CXBG3_ABI_USER_H + +#include #define IWCH_UVERBS_ABI_VERSION 1 @@ -71,4 +73,4 @@ struct iwch_create_qp_resp { struct iwch_reg_user_mr_resp { __u32 pbl_addr; }; -#endif +#endif /* CXGB3_ABI_USER_H */ From e44ee2fd9845a86b0c7e8742672ae5ba6d7d34ee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:13 +0300 Subject: [PATCH 63/75] IB/cxgb4: Move user vendor structures This patch moves cxgb4 vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libcxgb4) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- include/uapi/rdma/Kbuild | 1 + .../hw/cxgb4/user.h => include/uapi/rdma/cxgb4-abi.h | 11 ++++++----- 4 files changed, 9 insertions(+), 6 deletions(-) rename drivers/infiniband/hw/cxgb4/user.h => include/uapi/rdma/cxgb4-abi.h (94%) diff --git a/MAINTAINERS b/MAINTAINERS index 8ebd644becef0f..22fd3b7f5a319b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3482,6 +3482,7 @@ L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported F: drivers/infiniband/hw/cxgb4/ +F: include/uapi/rdma/cxgb4-abi.h CXGB4VF ETHERNET DRIVER (CXGB4VF) M: Casey Leedom diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index aa47e0ae80bc4c..f83604b2f82da4 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -58,7 +58,7 @@ #include "cxgb4.h" #include "cxgb4_uld.h" #include "l2t.h" -#include "user.h" +#include #define DRV_NAME "iw_cxgb4" #define MOD DRV_NAME ":" diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 681d7189a01a7e..59b2c9b34423b9 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -8,4 +8,5 @@ header-y += rdma_user_cm.h header-y += hfi/ header-y += rdma_user_rxe.h header-y += cxgb3-abi.h +header-y += cxgb4-abi.h header-y += mlx5-abi.h diff --git a/drivers/infiniband/hw/cxgb4/user.h b/include/uapi/rdma/cxgb4-abi.h similarity index 94% rename from drivers/infiniband/hw/cxgb4/user.h rename to include/uapi/rdma/cxgb4-abi.h index 295f422b9a3ab4..472b1599089405 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -29,8 +29,10 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __C4IW_USER_H__ -#define __C4IW_USER_H__ +#ifndef CXGB4_ABI_USER_H +#define CXGB4_ABI_USER_H + +#include #define C4IW_UVERBS_ABI_VERSION 3 @@ -51,9 +53,8 @@ struct c4iw_create_cq_resp { __u32 reserved; /* explicit padding (optional for i386) */ }; - enum { - C4IW_QPF_ONCHIP = (1<<0) + C4IW_QPF_ONCHIP = (1 << 0) }; struct c4iw_create_qp_resp { @@ -77,4 +78,4 @@ struct c4iw_alloc_ucontext_resp { __u32 status_page_size; __u32 reserved; /* explicit padding (optional for i386) */ }; -#endif +#endif /* CXGB4_ABI_USER_H */ From 9ce28a20eec551cfc032c87cd31a05c14d11155d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:14 +0300 Subject: [PATCH 64/75] IB/mlx4: Move user vendor structures This patch moves mlx4 vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libmlx4) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/infiniband/hw/mlx4/srq.c | 2 +- include/uapi/rdma/Kbuild | 1 + .../hw/mlx4/user.h => include/uapi/rdma/mlx4-abi.h | 6 +++--- 7 files changed, 9 insertions(+), 7 deletions(-) rename drivers/infiniband/hw/mlx4/user.h => include/uapi/rdma/mlx4-abi.h (97%) diff --git a/MAINTAINERS b/MAINTAINERS index 22fd3b7f5a319b..0af558f501cf2d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7801,6 +7801,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ S: Supported F: drivers/net/ethernet/mellanox/mlx4/ F: include/linux/mlx4/ +F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX4 IB driver M: Yishai Hadas diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index d6fc8a6e8c3324..2f0b4eed7eaeae 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -37,7 +37,7 @@ #include #include "mlx4_ib.h" -#include "user.h" +#include static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3ae64cefc39ef8..3c6d6103d18bf6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -55,7 +55,7 @@ #include #include "mlx4_ib.h" -#include "user.h" +#include #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "2.2-1" diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f5956645..44c8a0da1507e5 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -47,7 +47,7 @@ #include #include "mlx4_ib.h" -#include "user.h" +#include static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 0597f3eef5d03c..7dd3f267f06b23 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -37,7 +37,7 @@ #include #include "mlx4_ib.h" -#include "user.h" +#include static void *get_wqe(struct mlx4_ib_srq *srq, int n) { diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 59b2c9b34423b9..a1090ebc0b3453 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -9,4 +9,5 @@ header-y += hfi/ header-y += rdma_user_rxe.h header-y += cxgb3-abi.h header-y += cxgb4-abi.h +header-y += mlx4-abi.h header-y += mlx5-abi.h diff --git a/drivers/infiniband/hw/mlx4/user.h b/include/uapi/rdma/mlx4-abi.h similarity index 97% rename from drivers/infiniband/hw/mlx4/user.h rename to include/uapi/rdma/mlx4-abi.h index 07e6769ef43bc8..af431752655c57 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -31,8 +31,8 @@ * SOFTWARE. */ -#ifndef MLX4_IB_USER_H -#define MLX4_IB_USER_H +#ifndef MLX4_ABI_USER_H +#define MLX4_ABI_USER_H #include @@ -104,4 +104,4 @@ struct mlx4_ib_create_qp { __u8 reserved[5]; }; -#endif /* MLX4_IB_USER_H */ +#endif /* MLX4_ABI_USER_H */ From a7fe7380f6b234f207a599c9ffbaae6c1a574634 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:15 +0300 Subject: [PATCH 65/75] IB/ocrdma: Move user vendor structures This patch moves ocrdma vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libmlx4) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. In addition, it changes types to be __uXX instead of uXX. Signed-off-by: Leon Romanovsky Acked-By: Devesh Sharma Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- include/uapi/rdma/Kbuild | 1 + .../uapi/rdma/ocrdma-abi.h | 138 +++++++++--------- 5 files changed, 74 insertions(+), 70 deletions(-) rename drivers/infiniband/hw/ocrdma/ocrdma_abi.h => include/uapi/rdma/ocrdma-abi.h (64%) diff --git a/MAINTAINERS b/MAINTAINERS index 0af558f501cf2d..940c0ff03f3fa4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10601,6 +10601,7 @@ L: linux-rdma@vger.kernel.org W: http://www.emulex.com S: Supported F: drivers/infiniband/hw/ocrdma/ +F: include/uapi/rdma/ocrdma-abi.h SFC NETWORK DRIVER M: Solarflare linux maintainers diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 15e35acc690dc9..896071502739a8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -56,7 +56,7 @@ #include "be_roce.h" #include "ocrdma_hw.h" #include "ocrdma_stats.h" -#include "ocrdma_abi.h" +#include MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b1a3d91fe8b94f..71d0534960d613 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -51,7 +51,7 @@ #include "ocrdma.h" #include "ocrdma_hw.h" #include "ocrdma_verbs.h" -#include "ocrdma_abi.h" +#include int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index a1090ebc0b3453..492e144ebe90f1 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -11,3 +11,4 @@ header-y += cxgb3-abi.h header-y += cxgb4-abi.h header-y += mlx4-abi.h header-y += mlx5-abi.h +header-y += ocrdma-abi.h diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/include/uapi/rdma/ocrdma-abi.h similarity index 64% rename from drivers/infiniband/hw/ocrdma/ocrdma_abi.h rename to include/uapi/rdma/ocrdma-abi.h index 430b1350fe96ec..9f28191bef4d7e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/include/uapi/rdma/ocrdma-abi.h @@ -40,110 +40,112 @@ * Costa Mesa, CA 92626 */ -#ifndef __OCRDMA_ABI_H__ -#define __OCRDMA_ABI_H__ +#ifndef OCRDMA_ABI_USER_H +#define OCRDMA_ABI_USER_H + +#include #define OCRDMA_ABI_VERSION 2 #define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ struct ocrdma_alloc_ucontext_resp { - u32 dev_id; - u32 wqe_size; - u32 max_inline_data; - u32 dpp_wqe_size; - u64 ah_tbl_page; - u32 ah_tbl_len; - u32 rqe_size; - u8 fw_ver[32]; + __u32 dev_id; + __u32 wqe_size; + __u32 max_inline_data; + __u32 dpp_wqe_size; + __u64 ah_tbl_page; + __u32 ah_tbl_len; + __u32 rqe_size; + __u8 fw_ver[32]; /* for future use/new features in progress */ - u64 rsvd1; - u64 rsvd2; + __u64 rsvd1; + __u64 rsvd2; }; struct ocrdma_alloc_pd_ureq { - u64 rsvd1; + __u64 rsvd1; }; struct ocrdma_alloc_pd_uresp { - u32 id; - u32 dpp_enabled; - u32 dpp_page_addr_hi; - u32 dpp_page_addr_lo; - u64 rsvd1; + __u32 id; + __u32 dpp_enabled; + __u32 dpp_page_addr_hi; + __u32 dpp_page_addr_lo; + __u64 rsvd1; }; struct ocrdma_create_cq_ureq { - u32 dpp_cq; - u32 rsvd; /* pad */ + __u32 dpp_cq; + __u32 rsvd; /* pad */ }; #define MAX_CQ_PAGES 8 struct ocrdma_create_cq_uresp { - u32 cq_id; - u32 page_size; - u32 num_pages; - u32 max_hw_cqe; - u64 page_addr[MAX_CQ_PAGES]; - u64 db_page_addr; - u32 db_page_size; - u32 phase_change; + __u32 cq_id; + __u32 page_size; + __u32 num_pages; + __u32 max_hw_cqe; + __u64 page_addr[MAX_CQ_PAGES]; + __u64 db_page_addr; + __u32 db_page_size; + __u32 phase_change; /* for future use/new features in progress */ - u64 rsvd1; - u64 rsvd2; + __u64 rsvd1; + __u64 rsvd2; }; #define MAX_QP_PAGES 8 #define MAX_UD_AV_PAGES 8 struct ocrdma_create_qp_ureq { - u8 enable_dpp_cq; - u8 rsvd; - u16 dpp_cq_id; - u32 rsvd1; /* pad */ + __u8 enable_dpp_cq; + __u8 rsvd; + __u16 dpp_cq_id; + __u32 rsvd1; /* pad */ }; struct ocrdma_create_qp_uresp { - u16 qp_id; - u16 sq_dbid; - u16 rq_dbid; - u16 resv0; /* pad */ - u32 sq_page_size; - u32 rq_page_size; - u32 num_sq_pages; - u32 num_rq_pages; - u64 sq_page_addr[MAX_QP_PAGES]; - u64 rq_page_addr[MAX_QP_PAGES]; - u64 db_page_addr; - u32 db_page_size; - u32 dpp_credit; - u32 dpp_offset; - u32 num_wqe_allocated; - u32 num_rqe_allocated; - u32 db_sq_offset; - u32 db_rq_offset; - u32 db_shift; - u64 rsvd[11]; + __u16 qp_id; + __u16 sq_dbid; + __u16 rq_dbid; + __u16 resv0; /* pad */ + __u32 sq_page_size; + __u32 rq_page_size; + __u32 num_sq_pages; + __u32 num_rq_pages; + __u64 sq_page_addr[MAX_QP_PAGES]; + __u64 rq_page_addr[MAX_QP_PAGES]; + __u64 db_page_addr; + __u32 db_page_size; + __u32 dpp_credit; + __u32 dpp_offset; + __u32 num_wqe_allocated; + __u32 num_rqe_allocated; + __u32 db_sq_offset; + __u32 db_rq_offset; + __u32 db_shift; + __u64 rsvd[11]; } __packed; struct ocrdma_create_srq_uresp { - u16 rq_dbid; - u16 resv0; /* pad */ - u32 resv1; + __u16 rq_dbid; + __u16 resv0; /* pad */ + __u32 resv1; - u32 rq_page_size; - u32 num_rq_pages; + __u32 rq_page_size; + __u32 num_rq_pages; - u64 rq_page_addr[MAX_QP_PAGES]; - u64 db_page_addr; + __u64 rq_page_addr[MAX_QP_PAGES]; + __u64 db_page_addr; - u32 db_page_size; - u32 num_rqe_allocated; - u32 db_rq_offset; - u32 db_shift; + __u32 db_page_size; + __u32 num_rqe_allocated; + __u32 db_rq_offset; + __u32 db_shift; - u64 rsvd2; - u64 rsvd3; + __u64 rsvd2; + __u64 rsvd3; }; -#endif /* __OCRDMA_ABI_H__ */ +#endif /* OCRDMA_ABI_USER_H */ From c546b2a3b6612a9fa09940a13844ef384683db6e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:16 +0300 Subject: [PATCH 66/75] IB/nes: Move user vendor structures This patch moves nes vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libmlx4) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + drivers/infiniband/hw/nes/nes.h | 2 +- include/uapi/rdma/Kbuild | 1 + .../hw/nes/nes_user.h => include/uapi/rdma/nes-abi.h | 6 +++--- 4 files changed, 6 insertions(+), 4 deletions(-) rename drivers/infiniband/hw/nes/nes_user.h => include/uapi/rdma/nes-abi.h (97%) diff --git a/MAINTAINERS b/MAINTAINERS index 940c0ff03f3fa4..cc77b9ab9d526f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8086,6 +8086,7 @@ L: linux-rdma@vger.kernel.org W: http://www.intel.com/Products/Server/Adapters/Server-Cluster/Server-Cluster-overview.htm S: Supported F: drivers/infiniband/hw/nes/ +F: include/uapi/rdma/nes-abi.h NETEM NETWORK EMULATOR M: Stephen Hemminger diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index bd9d132f11c7c4..e7430c9254d381 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -165,7 +165,7 @@ do { \ #include "nes_hw.h" #include "nes_verbs.h" #include "nes_context.h" -#include "nes_user.h" +#include #include "nes_cm.h" #include "nes_mgt.h" diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 492e144ebe90f1..ec32a8baf600e1 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -11,4 +11,5 @@ header-y += cxgb3-abi.h header-y += cxgb4-abi.h header-y += mlx4-abi.h header-y += mlx5-abi.h +header-y += nes-abi.h header-y += ocrdma-abi.h diff --git a/drivers/infiniband/hw/nes/nes_user.h b/include/uapi/rdma/nes-abi.h similarity index 97% rename from drivers/infiniband/hw/nes/nes_user.h rename to include/uapi/rdma/nes-abi.h index 529c421bb15caf..6eb3734394a2a5 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/include/uapi/rdma/nes-abi.h @@ -34,8 +34,8 @@ * */ -#ifndef NES_USER_H -#define NES_USER_H +#ifndef NES_ABI_USER_H +#define NES_ABI_USER_H #include @@ -111,4 +111,4 @@ struct nes_create_qp_resp { __u32 nes_drv_opt; }; -#endif /* NES_USER_H */ +#endif /* NES_ABI_USER_H */ From 486f60954c71c5ce42341ed02cf804dda1c1bcc5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Sep 2016 17:31:17 +0300 Subject: [PATCH 67/75] IB/mthca: Move user vendor structures This patch moves mthca vendor's specific structures to common UAPI folder which will be visible to all consumers. These structures are used by user-space library driver (libmthca) and currently manually copied to that library. This move will allow cross-compile against these files and simplify introduction of vendor specific data. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- include/uapi/rdma/Kbuild | 1 + include/uapi/rdma/mthca-abi.h | 111 +++++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 include/uapi/rdma/mthca-abi.h diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index b697a0f77231fd..358930a41e36c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -46,7 +46,7 @@ #include "mthca_dev.h" #include "mthca_cmd.h" -#include "mthca_user.h" +#include #include "mthca_memfree.h" static void init_query_mad(struct ib_smp *mad) diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index ec32a8baf600e1..f14ab7ff5fee75 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -11,5 +11,6 @@ header-y += cxgb3-abi.h header-y += cxgb4-abi.h header-y += mlx4-abi.h header-y += mlx5-abi.h +header-y += mthca-abi.h header-y += nes-abi.h header-y += ocrdma-abi.h diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h new file mode 100644 index 00000000000000..bcbf4ff2f6d1ae --- /dev/null +++ b/include/uapi/rdma/mthca-abi.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_ABI_USER_H +#define MTHCA_ABI_USER_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +/* + * Mark the memory region with a DMA attribute that causes + * in-flight DMA to be flushed when the region is written to: + */ +#define MTHCA_MR_DMASYNC 0x1 + +struct mthca_reg_mr { + __u32 mr_attrs; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_resize_cq { + __u32 lkey; + __u32 reserved; +}; + +struct mthca_create_srq { + __u32 lkey; + __u32 db_index; + __u64 db_page; +}; + +struct mthca_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; +#endif /* MTHCA_ABI_USER_H */ From fd10ed8e6f4246ac5e18b921ba50562959502117 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:21 +0300 Subject: [PATCH 68/75] IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets In MLX qp packets, the LRH (built by the driver) has both a VL field and an SL field. When building a QP1 packet, the VL field should reflect the SLtoVL mapping and not arbitrarily contain zero (as is done now). This bug causes credit problems in IB switches at high rates of QP1 packets. The fix is to cache the SL to VL mapping in the driver, and look up the VL mapped to the SL provided in the send request when sending QP1 packets. For FW versions which support generating a port_management_config_change event with subtype sl-to-vl-table-change, the driver uses that event to update its sl-to-vl mapping cache. Otherwise, the driver snoops incoming SMP mads to update the cache. There remains the case where the FW is running in secure-host mode (so no QP0 packets are delivered to the driver), and the FW does not generate the sl2vl mapping change event. To support this case, the driver updates (via querying the FW) its sl2vl mapping cache when running in secure-host mode when it receives either a Port Up event or a client-reregister event (where the port is still up, but there may have been an opensm failover). OpenSM modifies the sl2vl mapping before Port Up and Client-reregister events occur, so if there is a mapping change the driver's cache will be properly updated. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 64 +++++++++++++- drivers/infiniband/hw/mlx4/main.c | 110 +++++++++++++++++++++++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 ++ drivers/infiniband/hw/mlx4/qp.c | 23 ++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 13 ++- include/linux/mlx4/device.h | 13 ++- 6 files changed, 220 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index d8886d051d4bc3..1301a1db958c17 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -230,6 +230,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad mad->mad_hdr.method == IB_MGMT_METHOD_SET) switch (mad->mad_hdr.attr_id) { case IB_SMP_ATTR_PORT_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; lid = be16_to_cpu(pinfo->lid); @@ -245,6 +247,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad break; case IB_SMP_ATTR_PKEY_TABLE: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; if (!mlx4_is_mfunc(dev->dev)) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); @@ -281,6 +285,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad break; case IB_SMP_ATTR_GUID_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, @@ -296,6 +302,26 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad } break; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV && + dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT) + return; + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port_num, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64); + } + break; + default: break; } @@ -806,8 +832,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) - smp_snoop(ibdev, port_num, in_mad, prev_lid); + smp_snoop(ibdev, port_num, in_mad, prev_lid); /* slaves get node desc from FW */ if (!mlx4_is_slave(to_mdev(ibdev)->dev)) node_desc_override(ibdev, out_mad); @@ -1038,6 +1063,23 @@ static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); } } + + /* Update the sl to vl table from inside client rereg + * only if in secure-host mode (snooping is not possible) + * and the sl-to-vl change event is not generated by FW. + */ + if (!mlx4_is_slave(dev->dev) && + dev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) { + if (mlx4_is_master(dev->dev)) + /* already in work queue from mlx4_ib_event queueing + * mlx4_handle_port_mgmt_change_event, which calls + * this procedure. Therefore, call sl2vl_update directly. + */ + mlx4_ib_sl2vl_update(dev, port_num); + else + mlx4_sched_ib_sl2vl_update_work(dev, port_num); + } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); } @@ -1156,6 +1198,24 @@ void handle_port_mgmt_change_event(struct work_struct *work) handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); } break; + + case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = + eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64); + } + break; default: pr_warn("Unsupported subtype 0x%x for " "Port Management Change event\n", eqe->subtype); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3c6d6103d18bf6..1811eb5b6aabfc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -832,6 +832,66 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return ret; } +static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl) +{ + union sl2vl_tbl_to_u64 sl2vl64; + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; + int err = -ENOMEM; + int jj; + + if (mlx4_is_slave(to_mdev(ibdev)->dev)) { + *sl2vl_tbl = 0; + return 0; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE; + in_mad->attr_mod = 0; + + if (mlx4_is_mfunc(to_mdev(ibdev)->dev)) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + + for (jj = 0; jj < 8; jj++) + sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj]; + *sl2vl_tbl = sl2vl64.sl64; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev) +{ + u64 sl2vl; + int i; + int err; + + for (i = 1; i <= mdev->dev->caps.num_ports; i++) { + if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + continue; + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl); + if (err) { + pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n", + i, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[i - 1], sl2vl); + } +} + int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { @@ -2675,6 +2735,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; + mlx4_init_sl2vl_tbl(ibdev); for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); @@ -3123,6 +3184,47 @@ static void handle_bonded_port_state_event(struct work_struct *work) ib_dispatch_event(&ibev); } +void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port) +{ + u64 sl2vl; + int err; + + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl); + if (err) { + pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n", + port, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[port - 1], sl2vl); +} + +static void ib_sl2vl_update_work(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *mdev = ew->ib_dev; + int port = ew->port; + + mlx4_ib_sl2vl_update(mdev, port); + + kfree(ew); +} + +void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port) +{ + struct ib_event_work *ew; + + ew = kmalloc(sizeof(*ew), GFP_ATOMIC); + if (ew) { + INIT_WORK(&ew->work, ib_sl2vl_update_work); + ew->port = port; + ew->ib_dev = ibdev; + queue_work(wq, &ew->work); + } else { + pr_err("failed to allocate memory for sl2vl update work\n"); + } +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -3153,10 +3255,14 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; - if (mlx4_is_master(dev) && + if (!mlx4_is_slave(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { - mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (mlx4_is_master(dev)) + mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) + mlx4_sched_ib_sl2vl_update_work(ibdev, p); } ibev.event = IB_EVENT_PORT_ACTIVE; break; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832ede4bd0c..8db7cb1a3716a0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -570,6 +570,7 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + atomic64_t sl2vl[MLX4_MAX_PORTS]; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; @@ -600,6 +601,7 @@ struct ib_event_work { struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; + int port; }; struct mlx4_ib_qp_tunnel_init_attr { @@ -883,4 +885,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, u8 port_num, int index); +void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port); + +void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 44c8a0da1507e5..16f654dc8a4659 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2405,6 +2405,22 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, return 0; } +static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) +{ + union sl2vl_tbl_to_u64 tmp_vltab; + u8 vl; + + if (sl > 15) + return 0xf; + tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]); + vl = tmp_vltab.sl8[sl >> 1]; + if (sl & 1) + vl &= 0x0f; + else + vl >>= 4; + return vl; +} + #define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -2587,7 +2603,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + sqp->qp.port); + if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d728704d0c7b52..d87bbe65c2b0ed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", - [34] = "DMFS Sniffer support (UC & MC)" + [34] = "DMFS Sniffer support (UC & MC)", + [36] = "sl to vl mapping table change event support" }; int i; @@ -703,6 +704,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 +#define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a #define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 @@ -822,6 +824,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET); + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT; MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN; @@ -2698,7 +2703,6 @@ static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, int mlx4_config_mad_demux(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; - int secure_host_active; int err; /* Check if mad_demux is supported */ @@ -2721,7 +2725,8 @@ int mlx4_config_mad_demux(struct mlx4_dev *dev) goto out; } - secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox); + if (mlx4_check_smp_firewall_active(dev, mailbox)) + dev->flags |= MLX4_FLAG_SECURE_HOST; /* Config mad_demux to handle all MADs returned by the query above */ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, @@ -2732,7 +2737,7 @@ int mlx4_config_mad_demux(struct mlx4_dev *dev) goto out; } - if (secure_host_active) + if (dev->flags & MLX4_FLAG_SECURE_HOST) mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); out: mlx4_free_cmd_mailbox(dev, mailbox); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cbe0..062d10aaf5cb65 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -71,7 +71,8 @@ enum { MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, - MLX4_FLAG_BONDED = 1 << 7 + MLX4_FLAG_BONDED = 1 << 7, + MLX4_FLAG_SECURE_HOST = 1 << 8, }; enum { @@ -221,6 +222,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 36, }; enum { @@ -448,6 +450,7 @@ enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, + MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP = 0x17, }; /* Port mgmt change event handling */ @@ -459,6 +462,11 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +union sl2vl_tbl_to_u64 { + u8 sl8[8]; + u64 sl64; +}; + enum { MLX4_DEVICE_STATE_UP = 1 << 0, MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, @@ -945,6 +953,9 @@ struct mlx4_eqe { __be32 block_ptr; __be32 tbl_entries_mask; } __packed tbl_change_info; + struct { + u8 sl2vl_table[8]; + } __packed sl2vl_tbl_change_info; } params; } __packed port_mgmt_change; struct { From 52746129e02998fb45942aed359e77e63def4997 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Sep 2016 09:09:42 -0700 Subject: [PATCH 69/75] IB/core: Improve ib_map_mr_sg() documentation Document that ib_map_mr_sg() is able to map physically discontiguous sg-lists as a single MR. Change IB_MR_TYPE_SG_GAPS_REG into IB_MR_TYPE_SG_GAPS. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d4ef3b1a695a3f..1c6b2a4f50d49b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1811,13 +1811,13 @@ EXPORT_SYMBOL(ib_set_vf_guid); * * Constraints: * - The first sg element is allowed to have an offset. - * - Each sg element must be aligned to page_size (or physically - * contiguous to the previous element). In case an sg element has a - * non contiguous offset, the mapping prefix will not include it. + * - Each sg element must either be aligned to page_size or virtually + * contiguous to the previous element. In case an sg element has a + * non-contiguous offset, the mapping prefix will not include it. * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. - * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. From 52bb8c626e0e5526c72b6ad17f1381f0bad283cc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Sep 2016 12:57:10 -0700 Subject: [PATCH 70/75] IB/srp: Remove an unused argument Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5ed1e8841a7b0f..60031b9159d9fe 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1388,7 +1388,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, static int srp_map_sg_entry(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct scatterlist *sg, int sg_index) + struct scatterlist *sg) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; @@ -1441,7 +1441,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; for_each_sg(scat, sg, count, i) { - ret = srp_map_sg_entry(state, ch, sg, i); + ret = srp_map_sg_entry(state, ch, sg); if (ret) return ret; } From 681cc3608355737c1effebc8145f95c8c3344bc3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Sep 2016 12:58:49 -0700 Subject: [PATCH 71/75] IB/srp: Fix infinite loop when FMR sg[0].offset != 0 Avoid that mapping an sg-list in which the first element has a non-zero offset triggers an infinite loop when using FMR. This patch makes the FMR mapping code similar to that of ib_sg_to_pages(). Note: older Mellanox HCAs do not support non-zero offsets for FMR. See also commit 8c4037b501ac ("IB/srp: always avoid non-zero offsets into an FMR"). Reported-by: Alex Estrin Signed-off-by: Bart Van Assche Cc: Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 60031b9159d9fe..d980fb458ad49c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1402,7 +1402,9 @@ static int srp_map_sg_entry(struct srp_map_state *state, while (dma_len) { unsigned offset = dma_addr & ~dev->mr_page_mask; - if (state->npages == dev->max_pages_per_mr || offset != 0) { + + if (state->npages == dev->max_pages_per_mr || + (state->npages > 0 && offset != 0)) { ret = srp_map_finish_fmr(state, ch); if (ret) return ret; @@ -1419,12 +1421,12 @@ static int srp_map_sg_entry(struct srp_map_state *state, } /* - * If the last entry of the MR wasn't a full page, then we need to + * If the end of the MR is not on a page boundary then we need to * close it out and start a new one -- we can only merge at page * boundaries. */ ret = 0; - if (len != dev->mr_page_size) + if ((dma_addr & ~dev->mr_page_mask) != 0) ret = srp_map_finish_fmr(state, ch); return ret; } From b6bc1c731f0b985e91f618561fc82c6e252dfaf4 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 29 Sep 2016 07:31:33 -0700 Subject: [PATCH 72/75] IB/core: correctly handle rdma_rw_init_mrs() failure Function ib_create_qp() was failing to return an error when rdma_rw_init_mrs() fails, causing a crash further down in ib_create_qp() when trying to dereferece the qp pointer which was actually a negative errno. The crash: crash> log|grep BUG [ 136.458121] BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 crash> bt PID: 3736 TASK: ffff8808543215c0 CPU: 2 COMMAND: "kworker/u64:2" #0 [ffff88084d323340] machine_kexec at ffffffff8105fbb0 #1 [ffff88084d3233b0] __crash_kexec at ffffffff81116758 #2 [ffff88084d323480] crash_kexec at ffffffff8111682d #3 [ffff88084d3234b0] oops_end at ffffffff81032bd6 #4 [ffff88084d3234e0] no_context at ffffffff8106e431 #5 [ffff88084d323530] __bad_area_nosemaphore at ffffffff8106e610 #6 [ffff88084d323590] bad_area_nosemaphore at ffffffff8106e6f4 #7 [ffff88084d3235a0] __do_page_fault at ffffffff8106ebdc #8 [ffff88084d323620] do_page_fault at ffffffff8106f057 #9 [ffff88084d323660] page_fault at ffffffff816e3148 [exception RIP: ib_create_qp+427] RIP: ffffffffa02554fb RSP: ffff88084d323718 RFLAGS: 00010246 RAX: 0000000000000004 RBX: fffffffffffffff4 RCX: 000000018020001f RDX: ffff880830997fc0 RSI: 0000000000000001 RDI: ffff88085f407200 RBP: ffff88084d323778 R8: 0000000000000001 R9: ffffea0020bae210 R10: ffffea0020bae218 R11: 0000000000000001 R12: ffff88084d3237c8 R13: 00000000fffffff4 R14: ffff880859fa5000 R15: ffff88082eb89800 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #10 [ffff88084d323780] rdma_create_qp at ffffffffa0782681 [rdma_cm] #11 [ffff88084d3237b0] nvmet_rdma_create_queue_ib at ffffffffa07c43f3 [nvmet_rdma] #12 [ffff88084d323860] nvmet_rdma_alloc_queue at ffffffffa07c5ba9 [nvmet_rdma] #13 [ffff88084d323900] nvmet_rdma_queue_connect at ffffffffa07c5c96 [nvmet_rdma] #14 [ffff88084d323980] nvmet_rdma_cm_handler at ffffffffa07c6450 [nvmet_rdma] #15 [ffff88084d3239b0] iw_conn_req_handler at ffffffffa0787480 [rdma_cm] #16 [ffff88084d323a60] cm_conn_req_handler at ffffffffa0775f06 [iw_cm] #17 [ffff88084d323ab0] process_event at ffffffffa0776019 [iw_cm] #18 [ffff88084d323af0] cm_work_handler at ffffffffa0776170 [iw_cm] #19 [ffff88084d323cb0] process_one_work at ffffffff810a1483 #20 [ffff88084d323d90] worker_thread at ffffffff810a211d #21 [ffff88084d323ec0] kthread at ffffffff810a6c5c #22 [ffff88084d323f50] ret_from_fork at ffffffff816e1ebf Fixes: 632bc3f65081 ("IB/core, RDMA RW API: Do not exceed QP SGE send limit") Signed-off-by: Steve Wise Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1c6b2a4f50d49b..83687646da6823 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -843,7 +843,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (ret) { pr_err("failed to init MR pool ret= %d\n", ret); ib_destroy_qp(qp); - qp = ERR_PTR(ret); + return ERR_PTR(ret); } } From 086de575c14239f77ea0dbf2370738a105c753ea Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 16 Sep 2016 07:54:49 -0700 Subject: [PATCH 73/75] cxgb4: advertise support for FR_NSMR_TPTE_WR Query firmware for the FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR parameter. If it exists and is 1, then advertise support for FR_NSMR_TPTE_WR to the ULDs. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 1 + 4 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6b052891368738..f6099d0c63515f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -349,6 +349,7 @@ struct adapter_params { unsigned int max_ordird_qp; /* Max read depth per RDMA QP */ unsigned int max_ird_adapter; /* Max read depth per adapter */ + bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */ }; /* State needed to monitor the forward progress of SGE Ingress DMA activities diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0099a0cd53ea11..7e858b2768b71a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2517,6 +2517,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; lli.nodeid = dev_to_node(adap->pdev_dev); + lli.fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -4061,6 +4062,12 @@ static int adap_init0(struct adapter *adap) adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0); } + /* See if FW supports FW_RI_FR_NSMR_TPTE_WR work request */ + params[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0); + /* * Get device capabilities so we can determine what resources we need * to manage. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index ab4037222f8dc0..93b1550ac7e6bb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -280,6 +280,7 @@ struct cxgb4_lld_info { unsigned int iscsi_llimit; /* chip's iscsi region llimit */ void **iscsi_ppm; /* iscsi page pod manager */ int nodeid; /* device numa node id */ + bool fr_nsmr_tpte_wr_support; /* FW supports FR_NSMR_TPTE_WR */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index d8f4adb9c7a60b..2f014ec5719311 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1120,6 +1120,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, FW_PARAMS_PARAM_DEV_FWCACHE = 0x18, + FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, }; /* From 49b53a93a64ab0aaec10851b004297a3ac885433 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 16 Sep 2016 07:54:52 -0700 Subject: [PATCH 74/75] iw_cxgb4: add fast-path for small REG_MR operations When processing a REG_MR work request, if fw supports the FW_RI_NSMR_TPTE_WR work request, and if the page list for this registration is <= 2 pages, and the current state of the mr is INVALID, then use FW_RI_NSMR_TPTE_WR to pass down a fully populated TPTE for FW to write. This avoids FW having to do an async read of the TPTE blocking the SQ until the read completes. To know if the current MR state is INVALID or not, iw_cxgb4 must track the state of each fastreg MR. The c4iw_mr struct state is updated as REG_MR and LOCAL_INV WRs are posted and completed, when a reg_mr is destroyed, and when RECV completions are processed that include a local invalidation. This optimization increases small IO IOPS for both iSER and NVMF. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 17 +++++ drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 67 ++++++++++++++++--- drivers/infiniband/hw/cxgb4/t4.h | 4 +- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 12 ++++ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 1 + 6 files changed, 92 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 812ab7278b8eec..bc522a9b2bfa4b 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -666,6 +666,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, return ret; } +static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} + /* * Get one cq entry from c4iw and map it to openib. * @@ -721,6 +733,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; + invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -746,6 +759,10 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) break; case FW_RI_FAST_REGISTER: wc->opcode = IB_WC_REG_MR; + + /* Invalidate the MR if the fastreg failed */ + if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) + invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 0b91b0f4df71cf..80e27749420a78 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -695,7 +695,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_NSMR; mhp->attr.stag = stag; - mhp->attr.state = 1; + mhp->attr.state = 0; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index edb1172b6f54c1..3467b906cff88e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -609,10 +609,42 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, + struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16) +{ + __be64 *p = (__be64 *)fr->pbl; + + fr->r2 = cpu_to_be32(0); + fr->stag = cpu_to_be32(mhp->ibmr.rkey); + + fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) | + FW_RI_TPTE_STAGSTATE_V(1) | + FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) | + FW_RI_TPTE_PDID_V(mhp->attr.pdid)); + fr->tpte.locread_to_qpid = cpu_to_be32( + FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) | + FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) | + FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12)); + fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V( + PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3)); + fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0); + fr->tpte.len_hi = cpu_to_be32(0); + fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length); + fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + + p[0] = cpu_to_be64((u64)mhp->mpl[0]); + p[1] = cpu_to_be64((u64)mhp->mpl[1]); + + *len16 = DIV_ROUND_UP(sizeof(*fr), 16); +} + static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported) + struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, + bool dsgl_supported) { - struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; @@ -674,9 +706,12 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, - u8 *len16) +static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { + struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); + + mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); @@ -816,18 +851,32 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_REG_MR: - fw_opcode = FW_RI_FR_NSMR_WR; + case IB_WR_REG_MR: { + struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); + swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + !mhp->attr.state && mhp->mpl_len <= 2) { + fw_opcode = FW_RI_FR_NSMR_TPTE_WR; + build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), + mhp, &len16); + } else { + fw_opcode = FW_RI_FR_NSMR_WR; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), + mhp, &len16, + qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (err) + break; + } + mhp->attr.state = 1; break; + } case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(wqe, wr, &len16); + err = build_inv_stag(qhp->rhp, wqe, wr, &len16); break; default: PDBG("%s post of type=%d TBD!\n", __func__, diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe36095c2..b2bfbb1eef1a2a 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -95,6 +95,7 @@ union t4_wr { struct fw_ri_rdma_read_wr read; struct fw_ri_bind_mw_wr bind; struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; @@ -170,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 nada1; + u32 stag; u16 nada2; u16 cidx; } scqe; @@ -232,6 +233,7 @@ struct t4_cqe { /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) +#define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) /* generic accessor macros */ #define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 1e26669793c374..010c709ba3bb53 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -669,6 +669,18 @@ struct fw_ri_fr_nsmr_wr { #define FW_RI_FR_NSMR_WR_DCACPU_G(x) \ (((x) >> FW_RI_FR_NSMR_WR_DCACPU_S) & FW_RI_FR_NSMR_WR_DCACPU_M) +struct fw_ri_fr_nsmr_tpte_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u32 r2; + __u32 stag; + struct fw_ri_tpte tpte; + __u64 pbl[2]; +}; + struct fw_ri_inv_lstag_wr { __u8 opcode; __u8 flags; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 2f014ec5719311..985a521ac5114c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -100,6 +100,7 @@ enum fw_wr_opcodes { FW_RI_RECV_WR = 0x17, FW_RI_BIND_MW_WR = 0x18, FW_RI_FR_NSMR_WR = 0x19, + FW_RI_FR_NSMR_TPTE_WR = 0x20, FW_RI_INV_LSTAG_WR = 0x1a, FW_ISCSI_TX_DATA_WR = 0x45, FW_CRYPTO_LOOKASIDE_WR = 0X6d, From 2937f375751922ffce9ef1d5fa84491840b0c8e0 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 6 Oct 2016 16:22:37 -0400 Subject: [PATCH 75/75] staging/lustre: Disable InfiniBand support We changed one of the RDMA APIs and Lustre's InfiniBand transport has not been updated to match. Disabled it for now. Signed-off-by: Doug Ledford --- drivers/staging/lustre/lnet/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index 2b5930150cdafe..13b43278a38dc8 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -35,6 +35,7 @@ config LNET_SELFTEST config LNET_XPRT_IB tristate "LNET infiniband support" depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on BROKEN default LNET && INFINIBAND help This option allows the LNET users to use infiniband as an