In [2]:
# if xgboost is needed
# import sys
# !{sys.executable} -m pip install xgboost
Collecting xgboost
  Downloading xgboost-1.7.3-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 14.7 MB/s eta 0:00:0000:0100:01
Requirement already satisfied: scipy in /usr/local/lib/python3.10/site-packages (from xgboost) (1.9.2)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/site-packages (from xgboost) (1.23.4)
Installing collected packages: xgboost
Successfully installed xgboost-1.7.3

[notice] A new release of pip available: 22.2.2 -> 23.0
[notice] To update, run: python3.10 -m pip install --upgrade pip
In [3]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier

from pandas import Series,DataFrame
from nfstream import NFStreamer

Import pcap data¶

In [4]:
normal_df = NFStreamer(source="/Users/z/Python/atroposinsights-site/ai-site/assets/notebooks/normal.pcap",
                         n_dissections=0,  
                         statistical_analysis=True).to_pandas()
In [5]:
attack_df = NFStreamer(source="/Users/z/Python/atroposinsights-site/ai-site/assets/notebooks/attack.pcap",
                         n_dissections=0,  
                         statistical_analysis=True).to_pandas()
In [6]:
normal_df
Out[6]:
id expiration_id src_ip src_mac src_oui src_port dst_ip dst_mac dst_oui dst_port ... src2dst_rst_packets src2dst_fin_packets dst2src_syn_packets dst2src_cwr_packets dst2src_ece_packets dst2src_urg_packets dst2src_ack_packets dst2src_psh_packets dst2src_rst_packets dst2src_fin_packets
0 0 0 84.3.251.103 fa:00:bc:90:d7:fa fa:00:bc 36925 84.3.251.102 0a:fe:ec:47:74:fb 0a:fe:ec 502 ... 0 1 2 0 0 0 6 1 1 1
1 1 0 84.3.251.105 fe:bb:16:7b:c3:27 fe:bb:16 42083 84.3.251.103 fa:00:bc:90:d7:fa fa:00:bc 502 ... 0 1 2 0 0 0 5 1 0 1
2 2 0 84.3.251.104 4a:35:83:e0:3d:a4 4a:35:83 35023 84.3.251.102 0a:fe:ec:47:74:fb 0a:fe:ec 502 ... 0 1 2 0 0 0 5 1 0 1
3 3 0 84.3.251.102 0a:fe:ec:47:74:fb 0a:fe:ec 52801 84.3.251.101 e6:3f:ac:c9:a8:8c e6:3f:ac 502 ... 0 1 2 0 0 0 5 1 0 1
4 4 0 84.3.251.101 e6:3f:ac:c9:a8:8c e6:3f:ac 37867 84.3.251.18 00:80:f4:03:fb:12 00:80:f4 502 ... 0 1 2 0 0 0 6 1 2 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
262 262 0 84.3.251.103 fa:00:bc:90:d7:fa fa:00:bc 34935 84.3.251.102 0a:fe:ec:47:74:fb 0a:fe:ec 502 ... 0 1 2 0 0 0 5 1 0 1
263 263 0 84.3.251.101 e6:3f:ac:c9:a8:8c e6:3f:ac 40745 84.3.251.18 00:80:f4:03:fb:12 00:80:f4 502 ... 0 1 2 0 0 0 6 1 2 1
264 264 0 84.3.251.101 e6:3f:ac:c9:a8:8c e6:3f:ac 60719 84.3.251.18 00:80:f4:03:fb:12 00:80:f4 502 ... 0 1 2 0 0 0 6 1 2 1
265 265 0 84.3.251.101 e6:3f:ac:c9:a8:8c e6:3f:ac 36137 84.3.251.18 00:80:f4:03:fb:12 00:80:f4 502 ... 0 1 2 0 0 0 6 1 2 1
266 266 0 84.3.251.104 4a:35:83:e0:3d:a4 4a:35:83 34199 84.3.251.102 0a:fe:ec:47:74:fb 0a:fe:ec 502 ... 0 1 2 0 0 0 5 1 0 1

267 rows × 77 columns

Label and combine the dataframe¶

In [7]:
normal_df['Label'] = 0
attack_df['Label'] = 1
In [8]:
combined_df = pd.concat([normal_df, attack_df], axis = 0)

Get dummies for categorical features & remove NaN¶

In [9]:
combined_df = pd.get_dummies(combined_df, columns = ['src_ip','src_mac','src_oui','dst_ip','dst_mac','dst_oui'], drop_first=True)
In [10]:
# Check for NaN
combined_df.replace([np.inf, -np.inf], pd.isna, inplace=True)
combined_df = combined_df.dropna(axis=1, how='all')
rows = combined_df.sum().sum()
nans = combined_df.isnull().sum().sum()
print('Total Rows: ', rows)
print('Detected NaN: ', nans, (nans/rows*100), '%')
Total Rows:  5146501725905314.0
Detected NaN:  0 0.0 %

Set X & y¶

In [11]:
X = combined_df.drop(['Label'], axis=1)
y = combined_df['Label']
In [12]:
X.shape  # 530 rows, 101 columns
Out[12]:
(530, 101)

Here we train XGBoost¶

In [17]:
xbg_reg = XGBClassifier().fit(X, y)

Which features were kept?¶

In [18]:
f_importance = xbg_reg.get_booster().get_score(importance_type='gain')
f_importance
Out[18]:
{'bidirectional_first_seen_ms': 74.43582153320312}
In [ ]:
 
In [ ]: