type | size | ratio | |
---|---|---|---|
1 | A | 3 | 0.2 |
2 | B | 5 | 0.4 |
3 | C | NaN | 0.8 |
Pandas Data Structures
>>> import pandas as pd
>>> df = pd.DataFrame({'type': ['A', 'B', 'C'], 'size': [3, 5, None], 'ratio': [0.2, 0.4, 0.8]})
>>> df
type size ratio
0 A 3.0 0.2
1 B 5.0 0.4
2 C NaN 0.8
>>> df = pd.read_csv('file.csv', header=None, nrows=5)
>>> df.to_csv('myDataFrame.csv')
df['type'] # Select all values from type column
df[['type', 'size']] # Select all values from type and size columns
df.iloc[:2, 0]
df.loc[:2, 'type']
df[df['ratio'] > 0.5]
df[(df['ratio'] > 0.5) & (df['type'] == 'A')] # or |, and &, not ~
df['ratio_x_100'] = df['ratio'] * 100
df.drop(columns=['type'])
df.drop('type', axis=1)
df.dropna()
df.sort_index()
df.sort_values(by='Country')
df['size']
df.shape
df.index
df.columns
df.dtypes
df.describe()
df.count()
df.sum()
df.cumsum()
df.min()
df.max()
df.mean()
df.median()
df.std()
df.apply(lambda x: x + 2, axis=0)
ss = df['size'] * df['ratio']
df['size'].fillna(0)
df['size'].fillna()
df['size'].plot()
df.plot.hist(bins=10)