import pandas as pd
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from pandas import Series,DataFrame
from nfstream import NFStreamer
normal_df = NFStreamer(source="/Users/z/Python/atroposinsights-site/ai-site/assets/notebooks/normal.pcap",
n_dissections=0,
statistical_analysis=True).to_pandas()
attack_df = NFStreamer(source="/Users/z/Python/atroposinsights-site/ai-site/assets/notebooks/attack.pcap",
n_dissections=0,
statistical_analysis=True).to_pandas()
normal_df
| id | expiration_id | src_ip | src_mac | src_oui | src_port | dst_ip | dst_mac | dst_oui | dst_port | ... | src2dst_rst_packets | src2dst_fin_packets | dst2src_syn_packets | dst2src_cwr_packets | dst2src_ece_packets | dst2src_urg_packets | dst2src_ack_packets | dst2src_psh_packets | dst2src_rst_packets | dst2src_fin_packets | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 84.3.251.103 | fa:00:bc:90:d7:fa | fa:00:bc | 36925 | 84.3.251.102 | 0a:fe:ec:47:74:fb | 0a:fe:ec | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 1 | 1 |
| 1 | 1 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 53511 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 2 | 2 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 38055 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 3 | 3 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 34677 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 4 | 4 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 55827 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 262 | 262 | 0 | 84.3.251.103 | fa:00:bc:90:d7:fa | fa:00:bc | 34935 | 84.3.251.102 | 0a:fe:ec:47:74:fb | 0a:fe:ec | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 5 | 1 | 0 | 1 |
| 263 | 263 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 40745 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 264 | 264 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 60719 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 265 | 265 | 0 | 84.3.251.101 | e6:3f:ac:c9:a8:8c | e6:3f:ac | 36137 | 84.3.251.18 | 00:80:f4:03:fb:12 | 00:80:f4 | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 6 | 1 | 2 | 1 |
| 266 | 266 | 0 | 84.3.251.104 | 4a:35:83:e0:3d:a4 | 4a:35:83 | 34199 | 84.3.251.102 | 0a:fe:ec:47:74:fb | 0a:fe:ec | 502 | ... | 0 | 1 | 2 | 0 | 0 | 0 | 5 | 1 | 0 | 1 |
267 rows × 77 columns
normal_df['Label'] = 0
attack_df['Label'] = 1
combined_df = pd.concat([normal_df, attack_df], axis = 0)
combined_df = pd.get_dummies(combined_df, columns = ['src_ip','src_mac','src_oui','dst_ip','dst_mac','dst_oui'], drop_first=True)
# Check for NaN
combined_df.replace([np.inf, -np.inf], pd.isna, inplace=True)
combined_df = combined_df.dropna(axis=1, how='all')
rows = combined_df.sum().sum()
nans = combined_df.isnull().sum().sum()
print('Total Rows: ', rows)
print('Detected NaN: ', nans, (nans/rows*100), '%')
Total Rows: 5146501725905314.0 Detected NaN: 0 0.0 %
X = combined_df.drop(['Label'], axis=1)
y = combined_df['Label']
X.shape # 530 rows, 101 columns
(530, 101)
#Build a logistic regression model
model = LogisticRegression()
rfe = RFE(model, n_features_to_select=20, step=1) # Use RFE to select the top 20 features
fit = rfe.fit(X, y)
pd.set_option('display.max_rows', None)
#Create a dataframe for the results
df_RFE_results = []
for i in range(X.shape[1]):
df_RFE_results.append(
{
'Feature_names': X.columns[i],
'Selected': rfe.support_[i],
'RFE_ranking': rfe.ranking_[i],
}
)
df_RFE_results = pd.DataFrame(df_RFE_results)
df_RFE_results.sort_values(by=['RFE_ranking'])
| Feature_names | Selected | RFE_ranking | |
|---|---|---|---|
| 13 | src2dst_first_seen_ms | True | 1 |
| 18 | dst2src_first_seen_ms | True | 1 |
| 17 | src2dst_bytes | True | 1 |
| 44 | dst2src_mean_piat_ms | True | 1 |
| 15 | src2dst_duration_ms | True | 1 |
| 14 | src2dst_last_seen_ms | True | 1 |
| 37 | bidirectional_stddev_piat_ms | True | 1 |
| 12 | bidirectional_bytes | True | 1 |
| 42 | src2dst_max_piat_ms | True | 1 |
| 19 | dst2src_last_seen_ms | True | 1 |
| 10 | bidirectional_duration_ms | True | 1 |
| 8 | bidirectional_first_seen_ms | True | 1 |
| 45 | dst2src_stddev_piat_ms | True | 1 |
| 46 | dst2src_max_piat_ms | True | 1 |
| 22 | dst2src_bytes | True | 1 |
| 41 | src2dst_stddev_piat_ms | True | 1 |
| 2 | src_port | True | 1 |
| 38 | bidirectional_max_piat_ms | True | 1 |
| 9 | bidirectional_last_seen_ms | True | 1 |
| 20 | dst2src_duration_ms | True | 1 |
| 40 | src2dst_mean_piat_ms | False | 2 |
| 36 | bidirectional_mean_piat_ms | False | 3 |
| 0 | id | False | 4 |
| 3 | dst_port | False | 5 |
| 52 | bidirectional_psh_packets | False | 6 |
| 51 | bidirectional_ack_packets | False | 7 |
| 11 | bidirectional_packets | False | 8 |
| 60 | src2dst_psh_packets | False | 9 |
| 68 | dst2src_psh_packets | False | 10 |
| 67 | dst2src_ack_packets | False | 11 |
| 59 | src2dst_ack_packets | False | 12 |
| 21 | dst2src_packets | False | 13 |
| 16 | src2dst_packets | False | 14 |
| 30 | src2dst_max_ps | False | 15 |
| 26 | bidirectional_max_ps | False | 16 |
| 28 | src2dst_mean_ps | False | 17 |
| 34 | dst2src_max_ps | False | 18 |
| 24 | bidirectional_mean_ps | False | 19 |
| 27 | src2dst_min_ps | False | 20 |
| 32 | dst2src_mean_ps | False | 21 |
| 31 | dst2src_min_ps | False | 22 |
| 23 | bidirectional_min_ps | False | 23 |
| 43 | dst2src_min_piat_ms | False | 24 |
| 29 | src2dst_stddev_ps | False | 25 |
| 25 | bidirectional_stddev_ps | False | 26 |
| 4 | protocol | False | 27 |
| 47 | bidirectional_syn_packets | False | 28 |
| 5 | ip_version | False | 29 |
| 53 | bidirectional_rst_packets | False | 30 |
| 69 | dst2src_rst_packets | False | 31 |
| 55 | src2dst_syn_packets | False | 32 |
| 63 | dst2src_syn_packets | False | 33 |
| 39 | src2dst_min_piat_ms | False | 34 |
| 33 | dst2src_stddev_ps | False | 35 |
| 80 | src_mac_e6:3f:ac:c9:a8:8c | False | 36 |
| 86 | src_oui_e6:3f:ac | False | 37 |
| 91 | dst_ip_84.3.251.18 | False | 38 |
| 54 | bidirectional_fin_packets | False | 39 |
| 62 | src2dst_fin_packets | False | 40 |
| 70 | dst2src_fin_packets | False | 41 |
| 35 | bidirectional_min_piat_ms | False | 42 |
| 89 | dst_ip_84.3.251.102 | False | 43 |
| 93 | dst_mac_0a:fe:ec:47:74:fb | False | 44 |
| 97 | dst_oui_0a:fe:ec | False | 45 |
| 72 | src_ip_84.3.251.103 | False | 46 |
| 87 | src_oui_fa:00:bc | False | 47 |
| 81 | src_mac_fa:00:bc:90:d7:fa | False | 48 |
| 90 | dst_ip_84.3.251.103 | False | 49 |
| 96 | dst_mac_fa:00:bc:90:d7:fa | False | 50 |
| 100 | dst_oui_fa:00:bc | False | 51 |
| 74 | src_ip_84.3.251.105 | False | 52 |
| 88 | src_oui_fe:bb:16 | False | 53 |
| 82 | src_mac_fe:bb:16:7b:c3:27 | False | 54 |
| 78 | src_mac_4a:35:83:e0:3d:a4 | False | 55 |
| 73 | src_ip_84.3.251.104 | False | 56 |
| 84 | src_oui_4a:35:83 | False | 57 |
| 95 | dst_mac_e6:3f:ac:c9:a8:8c | False | 58 |
| 99 | dst_oui_e6:3f:ac | False | 59 |
| 77 | src_mac_0a:fe:ec:47:74:fb | False | 60 |
| 83 | src_oui_0a:fe:ec | False | 61 |
| 71 | src_ip_84.3.251.102 | False | 62 |
| 85 | src_oui_74:46:a0 | False | 63 |
| 79 | src_mac_74:46:a0:bd:a7:1b | False | 64 |
| 76 | src_ip_84.3.251.20 | False | 65 |
| 92 | dst_ip_84.3.251.20 | False | 66 |
| 94 | dst_mac_74:46:a0:bd:a7:1b | False | 67 |
| 75 | src_ip_84.3.251.18 | False | 68 |
| 98 | dst_oui_74:46:a0 | False | 69 |
| 66 | dst2src_urg_packets | False | 70 |
| 7 | tunnel_id | False | 71 |
| 6 | vlan_id | False | 72 |
| 65 | dst2src_ece_packets | False | 73 |
| 64 | dst2src_cwr_packets | False | 74 |
| 58 | src2dst_urg_packets | False | 75 |
| 57 | src2dst_ece_packets | False | 76 |
| 56 | src2dst_cwr_packets | False | 77 |
| 1 | expiration_id | False | 78 |
| 48 | bidirectional_cwr_packets | False | 79 |
| 61 | src2dst_rst_packets | False | 80 |
| 49 | bidirectional_ece_packets | False | 81 |
| 50 | bidirectional_urg_packets | False | 82 |