用pandas实现SQL功能
SQL | pandas |
---|---|
select * from table | df_table |
select * from table limit 3 | df_table.head(3) |
select * from table where id = 2 | df_table[df_table[“id”] == 2] |
select * from table where id = 2 and name = “ice” | df_table[(df_table[“id”] == 2) & (df_table[“name”] == “ice”)] |
select name from table where id = 2 | df_table[df_table[“id”] == 2][[“name”]] |
select distinct name from table | df_table[“name”].unique() |
select count(distinct name) from table | df_table[“name”].nunique() |
select * from table where id = 2 order by inserttime | df_table[df_table[“id”] == 2].sort_values(by=”inserttime”) |
select * from table where id = 2 order by inserttime desc | df_table[df_table[“id”] == 2].sort_values(by=”inserttime”, ascending=False) |
select * from table where id in (1, 2, 3) | df_table[df_table[“id”].isin([1, 2, 3])] |
select * from table where id not in (1, 2, 3) | df_table[~df_table[“id”].isin([1, 2, 3])] |
select * from table group by name | df_table.groupby(by=”name”) |
select * from table group by name, age | df_table.groupby(by=[“name”, “age”]) |
select * from table group by name, age order by inserttime desc | df_table.groupby(by=[“name”, “age”]).reset_index().sort_values(by=”inserttime”, ascending=False) |
select * from table where id = 2 group by name having count()>1000 order by count() desc | df_table[df_table[“id”] == 2].groupby(“name”).filter(lambda g: len(g)>1000).reset_index().sort_values(ascending=False) |
select * from table order by inserttime desc limit 3 | df_table.nlargest(3, “inserttime”) |
select * from table order by inserttime desc limit 3 offset 5 | df_table.nlargest(8, “inserttime”).tail(3) |
select max(age),min(age), mean(age), median(age) from table | df_table.agg({“age”: [“max”, “min”, “mean”, “median”]}) |
select id, name, age from table1 join table2 on table1.name=table2.name where table2.age>18 | df_table1.merge(df_table2[df_table2[“age”]>18][[“name”]], on=”name”, how=”inner”)[[“id”, “name”, “age”]] |
update table set age=28 where name=”lihua” | df_table.loc[df_table[“name”] == “lihua”, “age”] = 28 |
delete from table where name = “kk” | df_table.drop(df_table[df_table[“name”] == “kk”]) |
本作品采用《CC 协议》,转载必须注明作者和本文链接