InĀ [4]:
import pandas as pd
df = pd.read_csv("001_12ib_2025m12_20260211-232655.csv",delimiter = ';')
InĀ [5]:
# show the first rows
df.head()
Out[5]:
Month Number of passengers Freight and mail total, tonne Number of passengers, cumulative from the beginning of the year Freight and mail, cumulative from the beginning of the year, tonnes
0 2019M01 1969169 16265 1969169 16265
1 2019M02 1951842 14872 3921011 31136
2 2019M03 2203647 18561 6124658 49698
3 2019M04 2135867 18412 8260525 68110
4 2019M05 2269401 20031 10529926 88141
InĀ [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 5 columns):
 #   Column                                                               Non-Null Count  Dtype 
---  ------                                                               --------------  ----- 
 0   Month                                                                84 non-null     object
 1   Number of passengers                                                 84 non-null     int64 
 2   Freight and mail total, tonne                                        84 non-null     int64 
 3   Number of passengers, cumulative from the beginning of the year      84 non-null     int64 
 4   Freight and mail, cumulative from the beginning of the year, tonnes  84 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 3.4+ KB
InĀ [7]:
# define the columns of interest, those with numbers
columns = ["Number of passengers","Freight and mail total, tonne","Number of passengers, cumulative from the beginning of the year","Freight and mail, cumulative from the beginning of the year, tonnes"]
InĀ [8]:
# normalizing the columns so that the values are scaled between 0.0 and 1.0
df[columns] = (df[columns] - df[columns].min()) / (df[columns].max() - df[columns].min())
InĀ [13]:
df[columns].describe().round(2)
Out[13]:
Number of passengers Freight and mail total, tonne Number of passengers, cumulative from the beginning of the year Freight and mail, cumulative from the beginning of the year, tonnes
count 84.00 84.00 84.00 84.00
mean 0.56 0.51 0.32 0.40
std 0.29 0.18 0.24 0.25
min 0.00 0.00 0.00 0.00
25% 0.36 0.43 0.14 0.19
50% 0.65 0.50 0.24 0.38
75% 0.74 0.58 0.49 0.59
max 1.00 1.00 1.00 1.00
InĀ [14]:
df[columns].corr()
Out[14]:
Number of passengers Freight and mail total, tonne Number of passengers, cumulative from the beginning of the year Freight and mail, cumulative from the beginning of the year, tonnes
Number of passengers 1.000000 0.642697 0.592027 0.239162
Freight and mail total, tonne 0.642697 1.000000 0.426754 0.360766
Number of passengers, cumulative from the beginning of the year 0.592027 0.426754 1.000000 0.835316
Freight and mail, cumulative from the beginning of the year, tonnes 0.239162 0.360766 0.835316 1.000000
InĀ [17]:
df[columns].plot(figsize=(12, 6))
Out[17]:
<Axes: >
No description has been provided for this image
InĀ [12]:
# export it to a PureData friendly format
df[columns].to_csv("passengers",sep=" ",header=False,lineterminator=";\n")