The Algorithms logo
The Algorithms
AboutDonate

Pandas

import pandas as pd
import numpy as np

def header(msg):
  print('-'*50)
  print('['+msg+']')
  
df = pd.DataFrame(
[['Jan',58,42,74,22,2.95],
['Feb',61,45,78,26,3.02],
['Mar',65,48,84,25,2.34],
['Apr',67,50,92,28,1.02],
['May',71,53,98,35,0.48],
['Jun',75,56,107,41,0.11],
['Jul',77,58,105,44,0.0],
['Aug',77,59,102,43,0.03],
['Sep',77,57,103,40,0.17],
['Oct',73,54,96,34,0.81],
['Nov',64,48,84,30,1.7],
['Dec',58,42,73,21,2.56]],
index = [0,1,2,3,4,5,6,7,8,9,10,11],
columns = ['month','avg_high','avg_low','record_high','record_low','avg_precipitation'])

print(df)
df.to_csv('foo.csv')
   month  avg_high  avg_low  record_high  record_low  avg_precipitation
0    Jan        58       42           74          22               2.95
1    Feb        61       45           78          26               3.02
2    Mar        65       48           84          25               2.34
3    Apr        67       50           92          28               1.02
4    May        71       53           98          35               0.48
5    Jun        75       56          107          41               0.11
6    Jul        77       58          105          44               0.00
7    Aug        77       59          102          43               0.03
8    Sep        77       57          103          40               0.17
9    Oct        73       54           96          34               0.81
10   Nov        64       48           84          30               1.70
11   Dec        58       42           73          21               2.56
header("2.df.head()")
print(df.head())

header("3.df.tail()")
print(df.head())
--------------------------------------------------
[2.df.head()]
  month  avg_high  avg_low  record_high  record_low  avg_precipitation
0   Jan        58       42           74          22               2.95
1   Feb        61       45           78          26               3.02
2   Mar        65       48           84          25               2.34
3   Apr        67       50           92          28               1.02
4   May        71       53           98          35               0.48
--------------------------------------------------
[3.df.tail()]
  month  avg_high  avg_low  record_high  record_low  avg_precipitation
0   Jan        58       42           74          22               2.95
1   Feb        61       45           78          26               3.02
2   Mar        65       48           84          25               2.34
3   Apr        67       50           92          28               1.02
4   May        71       53           98          35               0.48
header("4.df.dtypes()")
print(df.dtypes)

--------------------------------------------------
[4.df.dtypes()]
month                 object
avg_high               int64
avg_low                int64
record_high            int64
record_low             int64
avg_precipitation    float64
dtype: object
header("5.df.index()")
print(df.index)

--------------------------------------------------
[5.df.index()]
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype='int64')
header("6.df.columns()")
print(df.columns)

--------------------------------------------------
[6.df.columns()]
Index(['month', 'avg_high', 'avg_low', 'record_high', 'record_low',
       'avg_precipitation'],
      dtype='object')
header("7.df.values()")
print(df.values)
--------------------------------------------------
[7.df.values()]
[['Jan' 58 42 74 22 2.95]
 ['Feb' 61 45 78 26 3.02]
 ['Mar' 65 48 84 25 2.34]
 ['Apr' 67 50 92 28 1.02]
 ['May' 71 53 98 35 0.48]
 ['Jun' 75 56 107 41 0.11]
 ['Jul' 77 58 105 44 0.0]
 ['Aug' 77 59 102 43 0.03]
 ['Sep' 77 57 103 40 0.17]
 ['Oct' 73 54 96 34 0.81]
 ['Nov' 64 48 84 30 1.7]
 ['Dec' 58 42 73 21 2.56]]
header("8.df.describe()")
df.describe()
--------------------------------------------------
[8.df.describe()]
avg_high avg_low record_high record_low avg_precipitation
count 12.000000 12.000000 12.000000 12.000000 12.000000
mean 68.583333 51.000000 91.333333 32.416667 1.265833
std 7.366488 6.060303 12.323911 8.240238 1.186396
min 58.000000 42.000000 73.000000 21.000000 0.000000
25% 63.250000 47.250000 82.500000 25.750000 0.155000
50% 69.000000 51.500000 94.000000 32.000000 0.915000
75% 75.500000 56.250000 102.250000 40.250000 2.395000
max 77.000000 59.000000 107.000000 44.000000 3.020000
header("8.df.sort_values()")
df.sort_values("record_high" , ascending= "False")
--------------------------------------------------
[8.df.sort_values()]
month avg_high avg_low record_high record_low avg_precipitation
11 Dec 58 42 73 21 2.56
0 Jan 58 42 74 22 2.95
1 Feb 61 45 78 26 3.02
2 Mar 65 48 84 25 2.34
10 Nov 64 48 84 30 1.70
3 Apr 67 50 92 28 1.02
9 Oct 73 54 96 34 0.81
4 May 71 53 98 35 0.48
7 Aug 77 59 102 43 0.03
8 Sep 77 57 103 40 0.17
6 Jul 77 58 105 44 0.00
5 Jun 75 56 107 41 0.11
df.avg_low
0     42
1     45
2     48
3     50
4     53
5     56
6     58
7     59
8     57
9     54
10    48
11    42
Name: avg_low, dtype: int64
df['avg_low']
0     42
1     45
2     48
3     50
4     53
5     56
6     58
7     59
8     57
9     54
10    48
11    42
Name: avg_low, dtype: int64
df[2:4]
month avg_high avg_low record_high record_low avg_precipitation
2 Mar 65 48 84 25 2.34
3 Apr 67 50 92 28 1.02
df[['avg_low','avg_high','record_high']]
avg_low avg_high record_high
0 42 58 74
1 45 61 78
2 48 65 84
3 50 67 92
4 53 71 98
5 56 75 107
6 58 77 105
7 59 77 102
8 57 77 103
9 54 73 96
10 48 64 84
11 42 58 73
df.iloc[5:8 , [0,3]]  #it will use as array 
month record_high
5 Jun 107
6 Jul 105
7 Aug 102
df.loc[5:8 , ['avg_low','avg_high','record_high']]
avg_low avg_high record_high
5 56 75 107
6 58 77 105
7 59 77 102
8 57 77 103
df.loc[9, ['avg_low','avg_high','record_high']]
avg_low        54
avg_high       73
record_high    96
Name: 9, dtype: object
df.iloc[3:5,[0,3]]
month record_high
3 Apr 92
4 May 98