InĀ [4]:
import pandas as pd
df = pd.read_csv("001_12ib_2025m12_20260211-232655.csv",delimiter = ';')
InĀ [5]:
# show the first rows
df.head()
Out[5]:
| Month | Number of passengers | Freight and mail total, tonne | Number of passengers, cumulative from the beginning of the year | Freight and mail, cumulative from the beginning of the year, tonnes | |
|---|---|---|---|---|---|
| 0 | 2019M01 | 1969169 | 16265 | 1969169 | 16265 |
| 1 | 2019M02 | 1951842 | 14872 | 3921011 | 31136 |
| 2 | 2019M03 | 2203647 | 18561 | 6124658 | 49698 |
| 3 | 2019M04 | 2135867 | 18412 | 8260525 | 68110 |
| 4 | 2019M05 | 2269401 | 20031 | 10529926 | 88141 |
InĀ [6]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 84 entries, 0 to 83 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Month 84 non-null object 1 Number of passengers 84 non-null int64 2 Freight and mail total, tonne 84 non-null int64 3 Number of passengers, cumulative from the beginning of the year 84 non-null int64 4 Freight and mail, cumulative from the beginning of the year, tonnes 84 non-null int64 dtypes: int64(4), object(1) memory usage: 3.4+ KB
InĀ [7]:
# define the columns of interest, those with numbers
columns = ["Number of passengers","Freight and mail total, tonne","Number of passengers, cumulative from the beginning of the year","Freight and mail, cumulative from the beginning of the year, tonnes"]
InĀ [8]:
# normalizing the columns so that the values are scaled between 0.0 and 1.0
df[columns] = (df[columns] - df[columns].min()) / (df[columns].max() - df[columns].min())
InĀ [13]:
df[columns].describe().round(2)
Out[13]:
| Number of passengers | Freight and mail total, tonne | Number of passengers, cumulative from the beginning of the year | Freight and mail, cumulative from the beginning of the year, tonnes | |
|---|---|---|---|---|
| count | 84.00 | 84.00 | 84.00 | 84.00 |
| mean | 0.56 | 0.51 | 0.32 | 0.40 |
| std | 0.29 | 0.18 | 0.24 | 0.25 |
| min | 0.00 | 0.00 | 0.00 | 0.00 |
| 25% | 0.36 | 0.43 | 0.14 | 0.19 |
| 50% | 0.65 | 0.50 | 0.24 | 0.38 |
| 75% | 0.74 | 0.58 | 0.49 | 0.59 |
| max | 1.00 | 1.00 | 1.00 | 1.00 |
InĀ [14]:
df[columns].corr()
Out[14]:
| Number of passengers | Freight and mail total, tonne | Number of passengers, cumulative from the beginning of the year | Freight and mail, cumulative from the beginning of the year, tonnes | |
|---|---|---|---|---|
| Number of passengers | 1.000000 | 0.642697 | 0.592027 | 0.239162 |
| Freight and mail total, tonne | 0.642697 | 1.000000 | 0.426754 | 0.360766 |
| Number of passengers, cumulative from the beginning of the year | 0.592027 | 0.426754 | 1.000000 | 0.835316 |
| Freight and mail, cumulative from the beginning of the year, tonnes | 0.239162 | 0.360766 | 0.835316 | 1.000000 |
InĀ [17]:
df[columns].plot(figsize=(12, 6))
Out[17]:
<Axes: >
InĀ [12]:
# export it to a PureData friendly format
df[columns].to_csv("passengers",sep=" ",header=False,lineterminator=";\n")