Skip to content

Commit

Permalink
Update encap-already-in-use check to cover L3Outs and add doc (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
takishida authored Apr 29, 2024
1 parent dd9ee1e commit efbf284
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 43 deletions.
53 changes: 31 additions & 22 deletions aci-preupgrade-validation-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,48 +1169,57 @@ def encap_already_in_use_check(index, total_checks, **kwargs):
title = 'Encap Already In Use (F0467 encap-already-in-use)'
result = FAIL_O
msg = ''
headers = ["DN of Faulted EPG", "In Use by EPG", "Node", "Overlapping Encap(s)"]
headers = ["Faulted EPG/L3Out", "Node", "Port", "In Use Encap(s)", "In Use by EPG/L3Out"]
data = []
unformatted_headers = ['Fault Description']
unformatted_data = []
recommended_action = 'Resolve the overlapping encap configuration prior to upgrade'
print_title(title, index, total_checks)

desc_regex = r'Encap is already in use by (?P<inUseEpgStr>.+);'
nwissues_dn_regex = node_regex + r'/.*epp/fv-\[(?P<faultedEpgDn>.*)\]/node.*'
# <port> can be `ethX/X` or the name of I/F policy group
# <vlan> is not there for older versions
desc_regex = r'Configuration failed for (?P<failed>.+) node (?P<node>\d+) (?P<port>.+) due to .* Encap (\(vlan-(?P<vlan>\d+)\) )?is already in use by (?P<inuse>.+);'

faultInsts = icurl('class',
'faultInst.json?&query-target-filter=wcard(faultInst.descr,"encap-already-in-use")')
if faultInsts:
fvIfConns = icurl('class', 'fvIfConn.json')
for faultInst in faultInsts:
desc_array = re.search(desc_regex, faultInst['faultInst']['attributes']['descr'])

if desc_array:
inuse_epg_list = desc_array.group("inUseEpgStr").split(":")
in_use_epg_dn = "uni/tn-" + inuse_epg_list[0] + "/ap-" + inuse_epg_list[1] + "/epg-" + inuse_epg_list[2]

dn_array = re.search(nwissues_dn_regex, faultInst['faultInst']['attributes']['dn'])
faulted_epg_dn = dn_array.group("faultedEpgDn")
nodeId = dn_array.group("node")

'faultInst.json?query-target-filter=wcard(faultInst.descr,"encap-already-in-use")')
fvIfConns = []
for faultInst in faultInsts:
desc = re.search(desc_regex, faultInst['faultInst']['attributes']['descr'])
if desc:
failed_dn = desc.group("failed")
node_id = desc.group("node")
port_id = desc.group("port")
vlan_id = desc.group("vlan")
inuse_list = desc.group("inuse").split(":")
if len(inuse_list) == 3:
inuse_dn = "uni/tn-{0}/ap-{1}/epg-{2}".format(*inuse_list)
elif len(inuse_list) == 4:
inuse_dn = "uni/tn-{0}/out-{2}".format(*inuse_list)

# Get already-in-use encap(s) from fvIfConn when a fault doesn't include encap
if vlan_id is None:
faulted_epg_encaps = []
in_use_epg_encaps = []
if not fvIfConns:
fvIfConns = icurl('class', 'fvIfConn.json')
for fvIfConn in fvIfConns:
dn = fvIfConn['fvIfConn']['attributes']['dn']
encap = fvIfConn['fvIfConn']['attributes']['encap']
if (faulted_epg_dn in dn) and ("node-"+nodeId in dn):
if (failed_dn in dn) and ("node-"+node_id in dn):
if encap not in faulted_epg_encaps:
faulted_epg_encaps.append(encap)

if (in_use_epg_dn in dn) and ("node-"+nodeId in dn):
if (inuse_dn in dn) and ("node-"+node_id in dn):
if encap not in in_use_epg_encaps:
in_use_epg_encaps.append(encap)

overlapping_encaps = [x for x in in_use_epg_encaps if x in faulted_epg_encaps]
data.append([faulted_epg_dn, in_use_epg_dn, nodeId, ','.join(overlapping_encaps)])
else:
unformatted_data.append([faultInst['faultInst']['attributes']['descr']])
vlan_id = ",".join(overlapping_encaps)

data.append([failed_dn, node_id, port_id, vlan_id, inuse_dn])
else:
unformatted_data.append([faultInst['faultInst']['attributes']['descr']])

if not data and not unformatted_data:
result = PASS
print_result(title, result, msg, headers, data,
Expand Down
152 changes: 134 additions & 18 deletions docs/docs/validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,16 @@ Items | Faults | This Script
[Config On APIC Connected Port][f6] | F0467: port-configured-for-apic | :white_check_mark: | :white_check_mark: 6.0(1g) | :white_check_mark:
[L3 Port Config][f7] | F0467: port-configured-as-l2 | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[L2 Port Config][f8] | F0467: port-configured-as-l3 | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[Access (Untagged) Port Config][f9] | F0467: native-or-untagged-encap-failure | :white_check_mark: | :no_entry_sign: | :no_entry_sign:
[L3Out Subnets][f10] | F0467: prefix-entry-already-in-use | :white_check_mark: | :white_check_mark: 6.0(1g) | :white_check_mark:
[BD Subnets][f11] | F0469: duplicate-subnets-within-ctx | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[BD Subnets][f12] | F1425: subnet-overlap | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[VMM Domain Controller Status][f13] | F0130 | :white_check_mark: | :white_check_mark: 4.2(1) | :white_check_mark:
[VMM Domain LLDP/CDP Adjacency Status][f14] | F606391 | :white_check_mark: | :white_check_mark: 4.2(1) | :white_check_mark:
[Different infra VLAN via LLDP][f15] | F0454: infra-vlan-mismatch | :white_check_mark: | :white_check_mark: 4.2(4) | :white_check_mark:
[HW Programming Failure][f16] | F3544: L3Out Prefixes<br>F3545: Contracts | :white_check_mark: | :white_check_mark: 5.1(1) | :white_check_mark:
[Scalability (faults related to Capacity Dashboard)][f17] | TCA faults for eqptcapacityEntity | :white_check_mark: | :no_entry_sign: | :white_check_mark:
[Access (Untagged) Port Config][f9] | F0467: native-or-untagged-encap-failure | :white_check_mark: | :no_entry_sign: | :no_entry_sign:
[Encap Already in Use][f10] | F0467: encap-already-in-use | :white_check_mark: | :no_entry_sign: | :no_entry_sign:
[L3Out Subnets][f11] | F0467: prefix-entry-already-in-use | :white_check_mark: | :white_check_mark: 6.0(1g) | :white_check_mark:
[BD Subnets][f12] | F0469: duplicate-subnets-within-ctx | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[BD Subnets][f13] | F1425: subnet-overlap | :white_check_mark: | :white_check_mark: 5.2(4d) | :white_check_mark:
[VMM Domain Controller Status][f14] | F0130 | :white_check_mark: | :white_check_mark: 4.2(1) | :white_check_mark:
[VMM Domain LLDP/CDP Adjacency Status][f15] | F606391 | :white_check_mark: | :white_check_mark: 4.2(1) | :white_check_mark:
[Different infra VLAN via LLDP][f16] | F0454: infra-vlan-mismatch | :white_check_mark: | :white_check_mark: 4.2(4) | :white_check_mark:
[HW Programming Failure][f17] | F3544: L3Out Prefixes<br>F3545: Contracts | :white_check_mark: | :white_check_mark: 5.1(1) | :white_check_mark:
[Scalability (faults related to Capacity Dashboard)][f18] | TCA faults for eqptcapacityEntity | :white_check_mark: | :no_entry_sign: | :white_check_mark:

[f1]: #apic-disk-space-usage
[f2]: #standby-apic-disk-space-usage
Expand All @@ -79,16 +80,15 @@ Items | Faults | This Script
[f7]: #l2l3-port-config
[f8]: #l2l3-port-config
[f9]: #access-untagged-port-config
[f10]: #l3out-subnets
[f11]: #bd-subnets
[f10]: #encap-already-in-use
[f11]: #l3out-subnets
[f12]: #bd-subnets
[f13]: #vmm-domain-controller-status
[f14]: #vmm-domain-lldpcdp-adjacency-status
[f15]: #different-infra-vlan-via-lldp
[f16]: #hw-programming-failure
[f17]: #scalability-faults-related-to-capacity-dashboard


[f13]: #bd-subnets
[f14]: #vmm-domain-controller-status
[f15]: #vmm-domain-lldpcdp-adjacency-status
[f16]: #different-infra-vlan-via-lldp
[f17]: #hw-programming-failure
[f18]: #scalability-faults-related-to-capacity-dashboard


### Configuration Checks
Expand Down Expand Up @@ -703,6 +703,7 @@ The script verifies these faults to ensure that a port is not configured as part
```
apic1# moquery -c faultInst -x 'query-target-filter=wcard(faultInst.changeSet,"native-or-untagged-encap-failure")'
Total Objects shown: 1

# fault.Inst
code : F0467
ack : no
Expand Down Expand Up @@ -738,6 +739,121 @@ The script verifies these faults to ensure that a port is not configured as part
Please note that this behavior has recently changed. With the new behavior, rejected through policy distributor validation, two different access encapsulations are no longer allowed on the same port by the APIC. This change has been documented in CSCwj69435.


### Encap Already in Use

This is another type of the F0467 fault code family that you should check before an upgrade. This fault alerts that an interface configuration under an EPG or an SVI configuration for an L3Out has failed because the VLAN encapsulation for the interface is already used by another interface on the same switch for a different purpose. After an upgrade, it’s possible that the previous working configuration will break if this faulty policy is deployed first after the switch reloads.

It is critical that you resolve these issues before the upgrade to prevent any unexpected outages when the switch(es) upgrade. The VLAN encapsulation on the interface that the fault is raised on should either be corrected or deleted in order to clear the fault. You can run the moquery in the example below on the CLI of any Cisco APIC to check if these faults exist on the system. The faults are visible within the GUI as well.

!!! example "Fault Example (F0467: encap-already-in-use)"
The following shows three examples.

The first fault is for the interface configuration under the EPG `EPG1-2` in application profile `AP1` in tenant `TK` on node `101` interface `eth1/4` with VLAN `2011`. The fault description indicates that VLAN `2011` is already used by EPG `EPG1-1` in application profile `AP1` in tenant `TK`.

The second fault is for the SVI configuration under L3Out `BGP` in tenant `TK` on `node-103` interface `eth1/11` with VLAN `2013`. The fault description indicates that VLAN `2013` is already used by `EPG1-3` in application profile `AP1` in tenant `TK`.

The third fault is for the interface configuration under the EPG `EPG3-1` in application profile `AP1` in tenant `TK` on node `103` interface `eth1/1` with VLAN `2051`. The fault description indicates that VLAN `2051` is already used by L3Out `BGP` in tenant `TK`.

Note that the fault description may not include `(vlan-2011)` in `Encap (vlan-2011)` on older versions.
```
admin@apic1:~> moquery -c faultInst -x 'query-target-filter=wcard(faultInst.descr,"encap-already-in-use")'
Total Objects shown: 3

# fault.Inst
code : F0467
ack : no
alert : no
annotation :
cause : configuration-failed
changeSet : configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2011) is already in use by TK:AP1:EPG1-1;, temporaryError:no
childAction :
created : 2024-04-19T21:02:20.878-07:00
delegated : yes
descr : Configuration failed for uni/tn-TK/ap-AP1/epg-EPG1-2 node 101 eth1/4 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2011) is already in use by TK:AP1:EPG1-1;
dn : topology/pod-1/node-101/local/svc-policyelem-id-0/uni/epp/fv-[uni/tn-TK/ap-AP1/epg-EPG1-2]/node-101/stpathatt-[eth1/4]/nwissues/fault-F0467
domain : tenant
extMngdBy : undefined
highestSeverity : minor
lastTransition : 2024-04-19T21:04:25.300-07:00
lc : raised
modTs : never
occur : 1
origSeverity : minor
prevSeverity : minor
rn : fault-F0467
rule : fv-nw-issues-config-failed
severity : minor
status :
subject : management
title :
type : config
uid :
userdom : all

# fault.Inst
code : F0467
ack : no
alert : no
annotation :
cause : configuration-failed
changeSet : configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2013) is already in use by TK:AP1:EPG1-3;, temporaryError:no
childAction :
created : 2024-04-19T21:59:31.948-07:00
delegated : yes
descr : Configuration failed for uni/tn-TK/out-BGP node 103 eth1/11 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2013) is already in use by TK:AP1:EPG1-3;
dn : topology/pod-2/node-103/local/svc-policyelem-id-0/resPolCont/rtdOutCont/rtdOutDef-[uni/tn-TK/out-BGP]/node-103/stpathatt-[eth1/11]/nwissues/fault-F0467
domain : tenant
extMngdBy : undefined
highestSeverity : minor
lastTransition : 2024-04-19T21:59:31.948-07:00
lc : soaking
modTs : never
occur : 1
origSeverity : minor
prevSeverity : minor
rn : fault-F0467
rule : fv-nw-issues-config-failed
severity : minor
status :
subject : management
title :
type : config
uid :
userdom : all

# fault.Inst
code : F0467
ack : no
alert : no
annotation :
cause : configuration-failed
changeSet : configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2051) is already in use by TK:VRFA:l3out-BGP:vlan-2051;, temporaryError:no
childAction :
created : 2024-04-19T21:58:02.758-07:00
delegated : yes
descr : Configuration failed for uni/tn-TK/ap-AP1/epg-EPG3-1 node 103 eth1/1 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2051) is already in use by TK:VRFA:l3out-BGP:vlan-2051;
dn : topology/pod-2/node-103/local/svc-policyelem-id-0/uni/epp/fv-[uni/tn-TK/ap-AP1/epg-EPG3-1]/node-103/stpathatt-[eth1/1]/nwissues/fault-F0467
domain : tenant
extMngdBy : undefined
highestSeverity : minor
lastTransition : 2024-04-19T21:58:02.758-07:00
lc : soaking
modTs : never
occur : 1
origSeverity : minor
prevSeverity : minor
rn : fault-F0467
rule : fv-nw-issues-config-failed
severity : minor
status :
subject : management
title :
type : config
uid :
userdom : all
```


### L3Out Subnets

There is another type of the F0467 fault code family that you should check before an upgrade. This fault alerts that an external EPG defined under a Layer3 Out (L3Out) has a subnet with the **External Subnet for the External EPG** scope configured that overlaps with another L3Out external EPG in the same VRF. After an upgrade, it’s possible that the previous working configuration will break if this faulty policy is deployed first after the switch reloads.
Expand Down
1 change: 0 additions & 1 deletion tests/encap_already_in_use_check/faultInst-encap-neg.json

This file was deleted.

101 changes: 101 additions & 0 deletions tests/encap_already_in_use_check/faultInst-new-version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
[
{
"faultInst": {
"attributes": {
"ack": "no",
"alert": "no",
"cause": "configuration-failed",
"changeSet": "configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2011) is already in use by TK:AP1:EPG1-1;, temporaryError:no",
"childAction": "",
"code": "F0467",
"created": "2024-04-19T21:02:20.878-07:00",
"delegated": "yes",
"descr": "Configuration failed for uni/tn-TK/ap-AP1/epg-EPG1-2 node 101 eth1/4 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2011) is already in use by TK:AP1:EPG1-1;",
"dn": "topology/pod-1/node-101/local/svc-policyelem-id-0/uni/epp/fv-[uni/tn-TK/ap-AP1/epg-EPG1-2]/node-101/stpathatt-[eth1/4]/nwissues/fault-F0467",
"domain": "tenant",
"extMngdBy": "undefined",
"highestSeverity": "minor",
"lastTransition": "2024-04-19T21:04:25.300-07:00",
"lc": "raised",
"modTs": "never",
"occur": "1",
"origSeverity": "minor",
"prevSeverity": "minor",
"rn": "fault-F0467",
"rule": "fv-nw-issues-config-failed",
"severity": "minor",
"status": "",
"subject": "management",
"title": "",
"type": "config",
"userdom": "all"
}
}
},
{
"faultInst": {
"attributes": {
"ack": "no",
"alert": "no",
"cause": "configuration-failed",
"changeSet": "configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2013) is already in use by TK:AP1:EPG1-3;, temporaryError:no",
"childAction": "",
"code": "F0467",
"created": "2024-04-19T21:59:31.948-07:00",
"delegated": "yes",
"descr": "Configuration failed for uni/tn-TK/out-BGP node 103 eth1/11 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2013) is already in use by TK:AP1:EPG1-3;",
"dn": "topology/pod-2/node-103/local/svc-policyelem-id-0/resPolCont/rtdOutCont/rtdOutDef-[uni/tn-TK/out-BGP]/node-103/stpathatt-[eth1/11]/nwissues/fault-F0467",
"domain": "tenant",
"extMngdBy": "undefined",
"highestSeverity": "minor",
"lastTransition": "2024-04-19T22:01:52.348-07:00",
"lc": "raised",
"modTs": "never",
"occur": "1",
"origSeverity": "minor",
"prevSeverity": "minor",
"rn": "fault-F0467",
"rule": "fv-nw-issues-config-failed",
"severity": "minor",
"status": "",
"subject": "management",
"title": "",
"type": "config",
"userdom": "all"
}
}
},
{
"faultInst": {
"attributes": {
"ack": "no",
"alert": "no",
"cause": "configuration-failed",
"changeSet": "configQual:encap-already-in-use, configSt:failed-to-apply, debugMessage:encap-already-in-use: Encap (vlan-2051) is already in use by TK:VRFA:l3out-BGP:vlan-2051;, temporaryError:no",
"childAction": "",
"code": "F0467",
"created": "2024-04-19T21:58:02.758-07:00",
"delegated": "yes",
"descr": "Configuration failed for uni/tn-TK/ap-AP1/epg-EPG3-1 node 103 eth1/1 due to Encap Already Used in Another EPG, debug message: encap-already-in-use: Encap (vlan-2051) is already in use by TK:VRFA:l3out-BGP:vlan-2051;",
"dn": "topology/pod-2/node-103/local/svc-policyelem-id-0/uni/epp/fv-[uni/tn-TK/ap-AP1/epg-EPG3-1]/node-103/stpathatt-[eth1/1]/nwissues/fault-F0467",
"domain": "tenant",
"extMngdBy": "undefined",
"highestSeverity": "minor",
"lastTransition": "2024-04-19T22:00:22.338-07:00",
"lc": "raised",
"modTs": "never",
"occur": "1",
"origSeverity": "minor",
"prevSeverity": "minor",
"rn": "fault-F0467",
"rule": "fv-nw-issues-config-failed",
"severity": "minor",
"status": "",
"subject": "management",
"title": "",
"type": "config",
"userdom": "all"
}
}
}
]
Loading

0 comments on commit efbf284

Please sign in to comment.