-
读csv文件
import pandas food_info=pandas.read_csv('food_info.csv') print(type(food_info)) print(food_info.dtypes) print(help(pandas.read_csv))
运行结果:
-
显示前5行
food_info.head()
运行结果:
food_info.head(3)
运行结果:
-
显示后5行
food_info.tail()
运行结果:
print(food_info.shape)
运行结果:
-
读取特定的数据:
print(food_info.loc[0])
运行结果:
-
切片:
print(food_info.loc[3:6])
运行结果:
-
列名取数据
ndb_col=food_info['NDB_No'] print(ndb_col)
运行结果:
-
取某几个列
columns=['Zinc_(mg)','Copper_(mg)'] zinc_copper=food_info[columns] print(zinc_copper)
运行结果:
-
endswith()
col_names=food_info.columns.tolist() print(col_names) gram_columns=[] for c in col_names: if c.endswith('(g)'): gram_columns.append(c) gram_df=food_info[gram_columns] print(gram_df.head(3))
运行结果:
-
对取出的某些列进行数值运算
print(food_info['Iron_(mg)']) div_1000=food_info['Iron_(mg)']/1000 print(div_1000)
运行结果:
-
列和列相乘
water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"] water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"] iron_grams = food_info["Iron_(mg)"] / 1000 print(food_info.shape) food_info['Iron_(g)']=iron_grams print(food_info.shape)
运行结果:
-
求某一列的最大值
# the "Vit_A_IU" column ranges from 0 to 100000, while the "Fiber_TD_(g)" column ranges from 0 to 79 #For certain calculations, columns like "Vit_A_IU" can have a greater effect on the result, #due to the scale of the values # The largest value in the "Energ_Kcal" column. max_calories = food_info["Energ_Kcal"].max() # Divide the values in "Energ_Kcal" by the largest value. normalized_calories = food_info["Energ_Kcal"] / max_calories normalized_protein = food_info["Protein_(g)"] / food_info["Protein_(g)"].max() normalized_fat = food_info["Lipid_Tot_(g)"] / food_info["Lipid_Tot_(g)"].max() food_info["Normalized_Protein"] = normalized_protein food_info["Normalized_Fat"] = normalized_fat