- How to create pandas data-frame
- How do data manipulation with Pandas
- If we pass a Python dictionary as the
data
to the Pandas DataFrame input argument, we can create a Pandas DataFrame
import pandas as pd
df = pd.DataFrame(data= {'name':['john', 'mary', 'peter','jeff','bill', 'lisa'], 'age':[23, 78, 22, 19, 45, 33],
'state': ['iowa', 'dc', 'california', 'texas', 'washington', 'dc'], 'num_children': [2, 2, 0, 1, 2, 1],
'num_pets' : [0, 4, 0, 5, 0, 0]})
df.columns
df[['name', 'age', 'state']]
df.loc[2:4, 'name']
df.loc[2:4, ['name', 'age']]
# select the first 2 rows
df.iloc[:2]
# select the last 2 rows
df.iloc[-2:]
# select rows up to and including the one
# with index=2 (this retrieves 3 rows)
df.loc[:2]
# first column of data frame with index
df.iloc[:,0]
# select columns by name
df.loc[:,['age', 'state']]
# df[['age', 'state']]
# second row of data-frame
df.iloc[1]
# people whose "age" is greater than 30
df[df['age'] > 30]
df[df.age> 30]
# people who have more pets than children
df[df["num_pets"] > df[ "num_children"]]
# people older than 40 who own pets
df[(df["age"] > 40) & (df["num_pets"] > 0)]
df[ (df["age"] > 40) | (df["num_pets"] > 0) ]
# df itself is not modified; a copy is returned instead
df.drop(["age","num_children"],axis=1)
df
df.describe()
# Apply an aggregate function to every column
import numpy as np
df[["age","num_pets","num_children"]].mean()
df[["age","num_pets","num_children"]].apply(lambda col: np.mean(col),axis=0)
df[["age","num_pets","num_children"]].apply(lambda col: np.sum(col),axis=0)
df['age'].sum()
df[["age","num_pets","num_children"]].apply(lambda row: np.sum(row),axis=1)
df[['age']]
df['age']
df[["age"]].apply(lambda value: value*2)
df['age'] = df['age'].apply(lambda x: x*2)
# Sort DataFrame by column value
df.sort_values( "age", ascending= True)
# select rows whose name begins with the letter 'j'
df[df.apply(lambda row: row['name'].startswith('j'),axis=1)]
from collections import OrderedDict
from pandas import DataFrame
import pandas as pd
import numpy as np
table = OrderedDict((
("Item", ['Item0', 'Item0', 'Item1', 'Item1']),
('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
('USD', ['1$', '2$', '3$', '4$']),
('EU', ['1€', '2€', '3€', '4€'])
))
d = DataFrame(table)
p = d.pivot(index='Item', columns='CType', values='USD')