{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# 1、获取数据\n", "# 2、合并表\n", "# 3、找到user_id和aisle之间的关系\n", "# 4、PCA降维" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 1、获取数据\n", "order_products = pd.read_csv(\"./instacart/order_products__prior.csv\")\n", "products = pd.read_csv(\"./instacart/products.csv\")\n", "orders = pd.read_csv(\"./instacart/orders.csv\")\n", "aisles = pd.read_csv(\"./instacart/aisles.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# 2、合并表\n", "# order_products__prior.csv:订单与商品信息\n", "\n", "# 字段:order_id, product_id, add_to_cart_order, reordered\n", "# products.csv:商品信息\n", "# 字段:product_id, product_name, aisle_id, department_id\n", "# orders.csv:用户的订单信息\n", "# 字段:order_id,user_id,eval_set,order_number,….\n", "# aisles.csv:商品所属具体物品类别\n", "# 字段: aisle_id, aisle\n", "\n", "# 合并aisles和products aisle和product_id\n", "tab1 = pd.merge(aisles, products, on=[\"aisle_id\", \"aisle_id\"])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "tab2 = pd.merge(tab1, order_products, on=[\"product_id\", \"product_id\"])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "tab3 = pd.merge(tab2, orders, on=[\"order_id\", \"order_id\"])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | aisle_id | \n", "aisle | \n", "product_id | \n", "product_name | \n", "department_id | \n", "order_id | \n", "add_to_cart_order | \n", "reordered | \n", "user_id | \n", "eval_set | \n", "order_number | \n", "order_dow | \n", "order_hour_of_day | \n", "days_since_prior_order | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "prepared soups salads | \n", "209 | \n", "Italian Pasta Salad | \n", "20 | \n", "94246 | \n", "5 | \n", "0 | \n", "114082 | \n", "prior | \n", "26 | \n", "0 | \n", "20 | \n", "1.0 | \n", "
| 1 | \n", "1 | \n", "prepared soups salads | \n", "22853 | \n", "Pesto Pasta Salad | \n", "20 | \n", "94246 | \n", "4 | \n", "0 | \n", "114082 | \n", "prior | \n", "26 | \n", "0 | \n", "20 | \n", "1.0 | \n", "
| 2 | \n", "4 | \n", "instant foods | \n", "12087 | \n", "Chicken Flavor Ramen Noodle Soup | \n", "9 | \n", "94246 | \n", "15 | \n", "0 | \n", "114082 | \n", "prior | \n", "26 | \n", "0 | \n", "20 | \n", "1.0 | \n", "
| 3 | \n", "4 | \n", "instant foods | \n", "47570 | \n", "Original Flavor Macaroni & Cheese Dinner | \n", "9 | \n", "94246 | \n", "14 | \n", "1 | \n", "114082 | \n", "prior | \n", "26 | \n", "0 | \n", "20 | \n", "1.0 | \n", "
| 4 | \n", "13 | \n", "prepared meals | \n", "10089 | \n", "Dolmas | \n", "20 | \n", "94246 | \n", "25 | \n", "0 | \n", "114082 | \n", "prior | \n", "26 | \n", "0 | \n", "20 | \n", "1.0 | \n", "