{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Dealing with missing data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pandas import Series, DataFrame\n", "import pandas as pd\n", "from numpy.random import randn\n", "import numpy as np\n", "from collections import Counter\n", "import time\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(478138, 27) (61259, 26)\n", "['instance_id', 'item_id', 'item_category_list', 'item_property_list', 'item_brand_id', 'item_city_id', 'item_price_level', 'item_sales_level', 'item_collected_level', 'item_pv_level', 'user_id', 'user_gender_id', 'user_age_level', 'user_occupation_id', 'user_star_level', 'context_id', 'context_timestamp', 'context_page_id', 'predict_category_property', 'shop_id', 'shop_review_num_level', 'shop_review_positive_rate', 'shop_star_level', 'shop_score_service', 'shop_score_delivery', 'shop_score_description', 'is_trade']\n" ] } ], "source": [ "train = pd.read_table('round1_train.txt',sep = ' ')\n", "test_a = pd.read_table('round1_testa.txt',sep = ' ')\n", "test_b = pd.read_table('round1_testb.txt',sep = ' ')\n", "test = pd.concat([test_a,test_b]) # combine a,b test set\n", "print(train.shape,test.shape)\n", "print(list(train.columns))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# drop duplicate instances in train and test\n", "train.drop_duplicates(inplace = True)\n", "test.drop_duplicates(inplace = True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "positive instances num: 8994\n", "negative instances num: 469117\n" ] } ], "source": [ "print('positive instances num: ',sum(train['is_trade']))\n", "print('negative instances num: ', train.shape[0] - sum(train['is_trade']))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "item_brand_id 473\n", "item_city_id 277\n", "item_sales_level 913\n", "user_gender_id 12902\n", "user_age_level 964\n", "user_occupation_id 964\n", "user_star_level 964\n", "shop_review_positive_rate 7\n", "shop_score_service 59\n", "shop_score_delivery 59\n", "shop_score_description 59\n" ] } ], "source": [ "# how many missing data in each feature?\n", "\n", "# item_brand_id: 473 \n", "# item_city_id: 277 \n", "# item_sales_level: 913 \n", "# user_gender_id: 12902 \n", "# user_age_level: 964 \n", "# user_star_level: 964 \n", "# user_occupation_id: 964 \n", "# shop_review_positive_rate: 7 \n", "# shop_score_service 59\n", "# shop_score_delivery 59\n", "# shop_score_description 59\n", "\n", "for feature in list(train):\n", " missing_num = (train[feature] == -1).sum()\n", " if missing_num > 0:\n", " print(feature, missing_num)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# fill id with the most common one or two\n", "def filling_id(df,feature,missing_num):\n", " cc = Counter(df[feature])\n", " common = cc.most_common(2)\n", " # intutionly choose 5\n", " if common[0][1]/common[1][1] < 5: \n", " total = common[0][1] + common[1][1]\n", " p = [common[0][1]/total, common[1][1]/total]\n", " replace = np.random.choice([common[0][0], common[0][1]],missing_num,p)\n", " df.loc[df[feature] == -1, feature] = replace\n", " else:\n", " df.loc[df[feature] == -1, feature] = common[0][0]\n", " return df\n", "\n", "train= filling_id(train, 'item_brand_id', 473)\n", "train = filling_id(train,'item_city_id', 277)\n", "train = filling_id(train,'user_gender_id',12902)\n", "train = filling_id(train,'user_occupation_id',964)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# filling level with the most common six values\n", "def filling_level(df,feature,missing_num):\n", " cc = Counter(df[feature])\n", " # intutionly choose 6\n", " common = cc.most_common(6)\n", " total = 0\n", " p = []\n", " replace_list = []\n", " for i in common:\n", " total += i[1]\n", " for i in common:\n", " p.append(i[1]/total)\n", " replace_list.append(i[0])\n", " replace = np.random.choice(replace_list,missing_num,p)\n", " df.loc[df[feature] == -1, feature] = replace\n", " return df\n", "\n", "train = filling_level(train,'item_sales_level', 913)\n", "train = filling_level(train,'user_age_level', 964)\n", "train = filling_level(train, 'user_star_level', 964)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# replace missing data in these three columns with average\n", "items = ['shop_score_service', 'shop_score_delivery', 'shop_score_description']\n", "shop_service_mean = train.loc[train[items[0]] != -1, items[0]].mean()\n", "train.loc[train[items[0]] == -1, items[0]] = shop_service_mean\n", "shop_delivery_mean = train.loc[train[items[1]] != -1, items[1]].mean()\n", "train.loc[train[items[1]] == -1, items[1]] = shop_delivery_mean\n", "shop_description_mean = train.loc[train[items[2]] != -1, items[2]].mean()\n", "train.loc[train[items[2]] == -1, items[2]] = shop_description_mean" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(1.0, 265918), (0.9970402457701614, 1852)]\n" ] } ], "source": [ "# filling shop_review_positive_rate\n", "cc = Counter(train['shop_review_positive_rate'])\n", "print(cc.most_common(2))\n", "train.loc[train['shop_review_positive_rate'] == -1, 'shop_review_positive_rate'] = 1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
instance_iditem_iditem_category_listitem_property_listitem_brand_iditem_city_iditem_price_levelitem_sales_levelitem_collected_levelitem_pv_level...context_page_idpredict_category_propertyshop_idshop_review_num_levelshop_review_positive_rateshop_star_levelshop_score_serviceshop_score_deliveryshop_score_descriptionis_trade
010864107471412696434127203770986760697908382889764677758;57993470679825565202072967855524022579;5131280576272319091;263639...1975590437749032870394828332661642100333414...40065799347067982556520:-1;509660095530134768:-1;5...676593030904892234141.050021.01.01.00
1575471355159972516134127203770986760697908382889764677758;57993470679825565202072967855524022579;5131280576272319091;263639...1975590437749032870394828332661642100333414...40015799347067982556520:9172976955054793469;790838...676593030904892234141.050021.01.01.00
284267948129104098134127203770986760697908382889764677758;57993470679825565202072967855524022579;5131280576272319091;263639...1975590437749032870394828332661642100333414...40015799347067982556520:5131280576272319091;725801...676593030904892234141.050021.01.01.00
393708885005918902734127203770986760697908382889764677758;57993470679825565202072967855524022579;5131280576272319091;263639...1975590437749032870394828332661642100333414...4016509660095530134768:-1;5799347067982556520:-1;7...676593030904892234141.050021.01.01.00
4797569706501770807234127203770986760697908382889764677758;57993470679825565202072967855524022579;5131280576272319091;263639...1975590437749032870394828332661642100333414...40015799347067982556520:9172976955054793469;790838...676593030904892234141.050021.01.01.00
\n", "

5 rows × 27 columns

\n", "
" ], "text/plain": [ " instance_id item_id \\\n", "0 108641074714126964 3412720377098676069 \n", "1 5754713551599725161 3412720377098676069 \n", "2 842679481291040981 3412720377098676069 \n", "3 937088850059189027 3412720377098676069 \n", "4 7975697065017708072 3412720377098676069 \n", "\n", " item_category_list \\\n", "0 7908382889764677758;5799347067982556520 \n", "1 7908382889764677758;5799347067982556520 \n", "2 7908382889764677758;5799347067982556520 \n", "3 7908382889764677758;5799347067982556520 \n", "4 7908382889764677758;5799347067982556520 \n", "\n", " item_property_list item_brand_id \\\n", "0 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "1 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "2 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "3 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "4 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "\n", " item_city_id item_price_level item_sales_level \\\n", "0 3948283326616421003 3 3 \n", "1 3948283326616421003 3 3 \n", "2 3948283326616421003 3 3 \n", "3 3948283326616421003 3 3 \n", "4 3948283326616421003 3 3 \n", "\n", " item_collected_level item_pv_level ... context_page_id \\\n", "0 4 14 ... 4006 \n", "1 4 14 ... 4001 \n", "2 4 14 ... 4001 \n", "3 4 14 ... 4016 \n", "4 4 14 ... 4001 \n", "\n", " predict_category_property shop_id \\\n", "0 5799347067982556520:-1;509660095530134768:-1;5... 6765930309048922341 \n", "1 5799347067982556520:9172976955054793469;790838... 6765930309048922341 \n", "2 5799347067982556520:5131280576272319091;725801... 6765930309048922341 \n", "3 509660095530134768:-1;5799347067982556520:-1;7... 6765930309048922341 \n", "4 5799347067982556520:9172976955054793469;790838... 6765930309048922341 \n", "\n", " shop_review_num_level shop_review_positive_rate shop_star_level \\\n", "0 4 1.0 5002 \n", "1 4 1.0 5002 \n", "2 4 1.0 5002 \n", "3 4 1.0 5002 \n", "4 4 1.0 5002 \n", "\n", " shop_score_service shop_score_delivery shop_score_description is_trade \n", "0 1.0 1.0 1.0 0 \n", "1 1.0 1.0 1.0 0 \n", "2 1.0 1.0 1.0 0 \n", "3 1.0 1.0 1.0 0 \n", "4 1.0 1.0 1.0 0 \n", "\n", "[5 rows x 27 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Deal with timestamp" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# convert timestamp to date\n", "def timestamp_datetime(stamp):\n", " struct_time = time.localtime(stamp)\n", " date_time = time.strftime('%Y-%M-%d %H:%M:%S', struct_time)\n", " return date_time\n", "\n", "def convert_date_time(df):\n", " df['datetime'] = df.context_timestamp.apply(timestamp_datetime)\n", " df['day'] = df.datetime.apply(lambda x: int(x[8:10]))\n", " df['hour'] = df.datetime.apply(lambda x: int(x[11:13]))\n", " df['time'] = df.datetime.apply(lambda x: x[11:])\n", " return df\n", "\n", "train = convert_date_time(train)\n", "test = convert_date_time(test)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# how many days in the dataset?\n", "# how many samples in each day?\n", "cc_train = Counter(train['day'])\n", "cc_test = Counter(test['day'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.02459727983023054, 0.01895510632711998, 0.019108745869762273, 0.01879891119428377, 0.020147202016152128, 0.019324731560314813, 0.016210461796538542, 0.016020155923179312]\n" ] } ], "source": [ "# what's conversion rate in each day? Is there any special promotion day?\n", "days = cc_train.keys()\n", "conv_rates = []\n", "for day in days:\n", " conv_rate = train[train['day'] == day]['is_trade'].sum() / len(train[train['day'] == day])\n", " conv_rates.append(conv_rate)\n", "print(conv_rates)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# convert hour into midnight(0/24-7), morning(7-13), afternoon(13-19), night(19-24).\n", "def convert_to_timeslot(data):\n", " data['is_midnight'], data['is_morning'], data['is_afternoon'],data['is_night'] = -1, -1, -1, -1\n", " data['is_midnight'] = (data['hour'] <= 7).astype('int')\n", " data['is_morning'] = ((data['hour'] > 7) & (data['hour'] <= 13)).astype('int')\n", " data['is_afternoon'] = ((data['hour'] > 13) & (data['hour'] <= 19)).astype('int')\n", " data['is_night'] = (data['hour'] > 19).astype('int')\n", " return data\n", "\n", "train = convert_to_timeslot(train)\n", "test = convert_to_timeslot(test)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Dealing with categorical feature \n", "#### item_category_list & item_property_list & predict_category_property \n", " `['item_category_list']`: lists of the item's properties, of type String. The format is combined as \"category_0; category_1; category_2\", starting from the root category (the high-level category) to the lead category (the most detailed category). \n", "`['item_property_list']`: lists of item's properties, of type String. The format is combined as \"property_0; property_1; property_2\". Each property has no affiliation with the others. \n", "`['predict_category_property']`: lists of the predicted categories and properties, of type String. The format is combined as \"category A: property_A_1, property_A_2, property_A_3; catgory B: property_B_1, property_B_2, property_B_3\"" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Since we observe that the root category is always the same in our dataset (7908382889764677758), \n", "# and there are two categories in the dataset, we only consider the second one \n", "train['item_category'] = train['item_category_list'].apply(lambda x: str(x).split(';')[1])\n", "test['item_category'] = test['item_category_list'].apply(lambda x: str(x).split(';')[1])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# item_category_list & item_property_list & predict_category_property\n", "def extract_pred_cate_props(data):\n", " # extract category and property list from predict_cate_prop_lists \n", " pred_cate_prop_lists = list(data['predict_category_property'].str.split(';'))\n", " pred_cate_lists = []\n", " pred_prop_lists = []\n", " for pred_cate_props in pred_cate_prop_lists:\n", " pred_cate_list = []\n", " pred_prop_list = []\n", " \n", " if pred_cate_props == ['-1']:\n", " pred_cate_lists.append('-1')\n", " pred_prop_lists.append('-1')\n", " continue\n", " \n", " for pred_cate_prop in pred_cate_props:\n", " cate = pred_cate_prop.split(':')[0]\n", " if cate not in pred_cate_list:\n", " pred_cate_list.append(cate)\n", " \n", " props = pred_cate_prop.split(':')[1]\n", " props = props.split(',')\n", " for p in props:\n", " if p not in pred_prop_list:\n", " pred_prop_list.append(p)\n", " \n", " pred_cate_lists.append(pred_cate_list)\n", " pred_prop_lists.append(pred_prop_list)\n", " return pred_cate_lists, pred_prop_lists\n", " " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "def calc_cate_prop_precision_recall(data):\n", " cate_lists = list(data['item_category_list'].str.split(';')) \n", " prop_lists = list(data['item_property_list'].str.split(';')) \n", " pred_cate_lists, pred_prop_lists = extract_pred_cate_props(data)\n", " \n", " cate_precision_list = []\n", " cate_recall_list = []\n", " for pred_cate, cate in zip(pred_cate_lists, cate_lists):\n", " count = 0\n", " for i in pred_cate:\n", " if i in cate:\n", " count += 1\n", " cate_precision = count / len(pred_cate)\n", " cate_recall = count/len(cate)\n", " \n", " cate_precision_list.append(cate_precision)\n", " cate_recall_list.append(cate_recall)\n", " \n", " # Question: how to deal with missing data in category and property list????\n", " prop_precision_list = []\n", " prop_recall_list = []\n", " for pred_prop, prop in zip(pred_prop_lists, prop_lists):\n", " count = 0\n", " for i in pred_prop:\n", " if i in prop:\n", " count += 1\n", " prop_precision = count / len(pred_prop)\n", " prop_recall = count/len(prop)\n", " \n", " prop_precision_list.append(prop_precision)\n", " prop_recall_list.append(prop_recall)\n", " \n", " return cate_precision_list, cate_recall_list, prop_precision_list, prop_recall_list " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "cate_precision_list, cate_recall_list, prop_precision_list, prop_recall_list = calc_cate_prop_precision_recall(train)\n", "train['cate_precision'] = cate_precision_list\n", "train['cate_recall'] = cate_recall_list\n", "train['prop_precision'] = prop_precision_list\n", "train['prop_recall'] = prop_recall_list\n", "train.drop(['item_category_list', 'predict_category_property'],axis=1,inplace=True)\n", "\n", "cate_precision_list, cate_recall_list, prop_precision_list, prop_recall_list = calc_cate_prop_precision_recall(test)\n", "test['cate_precision'] = cate_precision_list\n", "test['cate_recall'] = cate_recall_list\n", "test['prop_precision'] = prop_precision_list\n", "test['prop_recall'] = prop_recall_list\n", "test.drop(['item_category_list', 'predict_category_property'],axis=1,inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. Dealing with continuous feature \n", "For some of continuous features, if they do not show clear linear relation with label, we may need to divide them into different section to find out the clear correlation.\n", "\n", "`['item_price_level'], ['item_sales_level'], ['item_collected_level'], ['item_pv_level']` \n", "`['user_age_level'], ['user_star_level']` \n", "`['shop_review_num_level'], ['shop_review_positive_rate'], ['shop_star_level'], ['shop_score_service'], ['shop_score_delivery'], ['shop_score_description']`" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAELCAYAAADKjLEqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl4VeW5/vHvkzlhJoQxzJMMokhEnFFRqbUOrRZta7VyDrVW29rhHD3tqa1tT221/rSntnW26qlWbbVoVRxxQjRBBJlUCFNkCBCGkISMz++PvRI3IcMK2Zsk5P5c17723mt41ptA9r3X8L7L3B0REZHmJLR1A0REpGNQYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJJausGxFKfPn182LBhbd0MEZEOZdGiRdvdPau55Q6rwBg2bBh5eXlt3QwRkQ7FzNaHWU6HpEREJBQFhoiIhKLAEBGRUBQYIiISigJDRERCiXtgmNlMM/vIzFab2fUNzP++ma0ws6Vm9oqZDY2aV21mHwSPufFuq4iINC6ul9WaWSJwJ3AmUADkmtlcd18RtdhiIMfdS83sW8BvgVnBvDJ3PzqebRQRkXDi3Q9jKrDa3fMBzOwx4HygLjDc/bWo5RcCX4tzmwC47L53KdhZRnavdB6efdyh2KSISIcW70NSg4CNUe8LgmmNmQ08H/U+zczyzGyhmV3Q0ApmNidYJm/btm2hG1aws4y120so2FkWeh0Rkc4s3nsY1sA0b3BBs68BOcCpUZOHuPsmMxsBvGpmH7r7mv2Kud8N3A2Qk5PTYG0REWm9eO9hFACDo95nA5vqL2RmM4AfA+e5e3ntdHffFDznA/OByfFsrIiINC7egZELjDaz4WaWAlwC7He1k5lNBu4iEhaFUdN7mVlq8LoPcCJR5z5EROTQiushKXevMrNrgHlAInC/uy83s5uAPHefC9wCdAWeMDOADe5+HjAOuMvMaogE2831rq4SEZFDKO6j1br7c8Bz9ab9NOr1jEbWWwAcGd/WiYhIWOrpLSIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhJK3APDzGaa2UdmttrMrm9g/vfNbIWZLTWzV8xsaNS8y83sk+BxebzbKiIijYtrYJhZInAn8DlgPHCpmY2vt9hiIMfdJwFPAr8N1u0N3AgcB0wFbjSzXvFsr4iINC7eexhTgdXunu/uFcBjwPnRC7j7a+5eGrxdCGQHr88GXnL3InffCbwEzIxze0VEpBHxDoxBwMao9wXBtMbMBp4/yHVFRCSOkuJc3xqY5g0uaPY1IAc4tSXrmtkcYA7AkCFDDq6VIiLSrHjvYRQAg6PeZwOb6i9kZjOAHwPnuXt5S9Z197vdPcfdc7KysmLWcBER2V+8AyMXGG1mw80sBbgEmBu9gJlNBu4iEhaFUbPmAWeZWa/gZPdZwTQREWkDcT0k5e5VZnYNkQ/6ROB+d19uZjcBee4+F7gF6Ao8YWYAG9z9PHcvMrNfEAkdgJvcvSie7RURkcbF+xwG7v4c8Fy9aT+Nej2jiXXvB+6PX+tERCQs9fQWEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQkFAWGiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIiocQ9MMxsppl9ZGarzez6BuafYmbvm1mVmV1Ub161mX0QPObGu60iItK4pHgWN7NE4E7gTKAAyDWzue6+ImqxDcAVwA8bKFHm7kfHs40iIhJOXAMDmAqsdvd8ADN7DDgfqAsMd18XzKuJc1tERKQV4n1IahCwMep9QTAtrDQzyzOzhWZ2QWybJiIiLRHvPQxrYJq3YP0h7r7JzEYAr5rZh+6+Zr8NmM0B5gAMGTLk4FsqIiJNivceRgEwOOp9NrAp7Mruvil4zgfmA5MbWOZud89x95ysrKzWtVZERBoV78DIBUab2XAzSwEuAUJd7WRmvcwsNXjdBziRqHMfIiJyaLUoMMws3czGhl3e3auAa4B5wErgcXdfbmY3mdl5Qc1jzawAuBi4y8yWB6uPA/LMbAnwGnBzvaurRETkEAp9DsPMvgDcCqQAw83saOAmdz+vqfXc/TnguXrTfhr1OpfIoar66y0AjgzbPhERia+W7GH8jMhlsrsA3P0DYFjsmyQiIu1RSwKjyt13x60lIiLSrrXkstplZvYVINHMRgPfARbEp1kiItLetGQP41pgAlAOPArsAb4Xj0aJiEj7E3oPw91LgR8HDxER6WSaDQwze4Ymemc3d5WUiIgcHsLsYdwaPH8R6A88Ery/FFgXhzaJiEg71GxguPvrAGb2C3c/JWrWM2b2RtxaJiIi7UpLTnpnBYMAAmBmwwEN3iQi0km05LLa64D5ZpYfvB8GfDPmLRIRkXapJVdJvRD0vzgimLTK3cvj0ywREWlvWno/jNHAWCANOMrMcPeHYt8sERFpb1oy+OCNwHRgPJHBBD8HvAUoMEREOoGWnPS+CDgD2OLu3wCOAlLj0ioREWl3WhIYZe5eA1SZWXegEBjRzDoiInKYaMk5jDwz6wncAywC9gLvxaVVIiLS7oQKDDMz4Nfuvgv4s5m9AHR396VxbZ2IiLQboQ5JubsDT0e9X6ewEBHpXFpyDmOhmR0bt5aIiEi71pJzGKcB3zSz9UAJYER2PibFpWUiItKutCQwPhe3VoiISLvXkkNSv3T39dEP4JfxapiIiLQvLQmMCdFvzCwRmBLb5oiISHvVbGCY2Q1mVgxMMrM9waOYSMe9f8a9hSIi0i40Gxju/mt37wbc4u7dg0c3d8909xtqlzOzCU2UERGRDi70IanocGjEw61si4iItGMtOYfRHIthLRERaWdiGRgew1oiItLOtPQGShLSZfe9S8HOMrJ7pfPw7OPaujkiIq0Wy8CoiGGtDq9gZxlrt5e0dTNERGIm9CEpMzvRzLoEr79mZreZ2dDa+e4+rZH1ZprZR2a22syub2D+KWb2vplVmdlF9eZdbmafBI/Lw/9YIiISay05h/EnoNTMjgL+A1hPM7dnDTr33UlkWJHxwKVmNr7eYhuAK4C/1lu3N3AjcBwwFbjRzHq1oL0iIhJDLQmMqmCY8/OBO9z9DqBbM+tMBVa7e767VwCPBevXiRoqvabeumcDL7l7kbvvBF4CZragvSIiEkMtCYxiM7sB+Brwr2DvIbmZdQYBG6PeFwTTwmjNuiIiEmMtCYxZQDkw2923EPnwvqWZdRrqmxH28ttQ65rZHDPLM7O8bdu2hSwtIiIt1ZKe3lvc/TZ3fzN4v8HdmzyHQWSvYHDU+2xgU8hNhlrX3e929xx3z8nKygpZWkREWirM4INvBc/FUYMP7ql938zqucBoMxtuZinAJcDckG2bB5xlZr2Ck91nBdNERKQNNNsPw91PCp6bO8Hd0LpVZnYNkQ/6ROB+d19uZjcBee4+N7jt61NAL+ALZvZzd5/g7kVm9gsioQNwk7sXtbQNIiISG3Hv6e3uzwHP1Zv206jXuUQONzW07v3A/XFtoIiIhBLLsaREROQwpsAQEZFQFBgiIhJKpwyMbcXl7CmrBKCkvIqq6vqdzEVEpL5OFxh3vb6GE25+hR0lkcF1C4vLmX7rfJZv2t3GLRMRad86VWD884NP+fXzq6is3r/DeMHOMr5+33vsLq1so5aJiLR/nSow/jR/TaPzdpRU8MSijY3OFxHp7DpNYOzZV8mqLcVNLvPeWvULFBFpTKcJjKSEhsYy3F9yYqf5dYiItFin+YTMSEnihJGZTS5TXePU1IQdTFdEpHPpNIEB8L0ZY5rc03hh+Ra++cgiivfp5LeISH2dKjCmDu/NvZfnMDQzY7/p4wd0JyU4HPXSiq2cf+fbrC7c2xZNFBFptzpVYABMH9uX134wnYE90gDI7pXOc989mSe/dXzdtPxtJVxw59u8uHxLWzZVRKRd6XSBAZCQYKQmJwKfneielN2TudeexLQRvQHYW17FnIcXcduLH7XL8xqX3fcup906n8vue7etmyIinUSnDIzG9OmayiOzj2P2ScPrpv3+1dX820N57C5rX+c1CnaWsXZ7CQU7y9q6KSLSSSgw6klKTOC/zx3P7bOOJjUp8ut5dVUhF9z5Np9sbbofh4jI4UyB0YgLJg/i7986gUE90wFYuz1yXuOFZZvbuGUiIm1DgdGEiYN68My1J3HiqEj/jZKKaq565H1++8IqqtvheQ0RkXhSYDSjd5cU/vKNqXzzlBF10/44fw1XPpirwQpFpFNRYISQlJjADeeM4/eXTiYtOfIre/3jbXzhD2+xasueNm6diMihocBogfOOGshTV5/IkN6Rjn8bikq58M4FPLt0Uxu3TEQk/hQYLTRuQHfmXnMip4zJAqCssppr/rqYXz+3UnfuE5HDmgLjIPTMSOGBK47l6ukj66bd9UY+VzyQy86SCopKKurGo9pXWY27TpCLSMenwDhIiQnGf8w8gj9+9RgyUiK9xt9avZ3pt85n6v+8zPa9kVvAbt69jwv+uIBPd6mDnYh0bAqMVjrnyAE8/e0TGRYMaLi7rJKqereAXbJxF5ff/x4VVTpkJSIdlwIjBsb068ZTV59Y1zO8IasL9zJPgxmKSAemwIiR3WWVlDezB/HKqq2HqDUiIrGX1NYNOFwkhrgF7NOLN7F+RykzJ/Tn7An9GdanyyFomYhIbCgwYiS7VzojsrqQv62kyeUWb9jF4g27+PXzqziifzfOntCfmRP7c0T/bpg1HzoiIm1FgREjZsb3ZozhO48ubnD+oJ7pDOmdwXvriurGoVq1pZhVW4q545VPGJqZwcwJ/TlrQn8mD+5JQog9lni67L53KdhZRnavdB6efVybtkVE2oe4B4aZzQTuABKBe9395nrzU4GHgCnADmCWu68zs2HASuCjYNGF7n5VvNvbGucdNZCyiipufn4VO6PGmZoxri+3XHQUvbqksLOkgpdXbmXe8i288cn2uiun1u8o5a438rnrjXz6dkut2/OYOrx33U2eIHJjp7/lbmTz7shlujv2lrNhRylD6t12trVq77cRa6sLi7n3zbUsWLODBIOTR2fxbycPZ2imDs+JtHcWz05lZpYIfAycCRQAucCl7r4iapmrgUnufpWZXQJc6O6zgsB41t0nht1eTk6O5+XlhVr2tFvns3Z7CcP7dOG1H04Pu4lQ9lVWc+otr7F1TznZvdJ56z9Pb3C5veVVzP+okBeWbeG1VYWUVFQfsEzPjGRmjOvH2RP6M6ZfV654IPeAD/K05ATuviynrvd5a+wuq+SJvI3c+uJH7KusoWdGMi9ddypZ3VJbXXvB6u1c+Zdc9lXuf3FA19QkHp49lclDerV6GyLScma2yN1zmlsu3nsYU4HV7p4fNOox4HxgRdQy5wM/C14/CfzBOvjB/LTkRDJSkoDy/fYO6uuamsS5kwZy7qSB7KusZsGa7bywbAsvrdhat4eyq7SSJxcV8OSiAhLNqG4g4PdV1nDto4tZcP3pdEk9+H/SDTtKufSehft1MtxVWsmZt73Ow7OP48jsHgddu7K6huse/+CAsIBIcP7giSW8fN2pbX4oTkQaF+/AGARsjHpfANQ/IF63jLtXmdluIDOYN9zMFgN7gJ+4+5txbm+bSUtO5PQj+nH6Ef2oqq4hd91O5i3fwgvLtrBlzz6ABsOi1u6ySuY8lMekwT3JSE4kPSUSWhkpta8jj/TkpM9eB8vUXuF13eMfNNgjfVdZJVf/dRHzf3gaCQblVTWUVlRTUl4Vea6oorQ8eK6ooqS8ev/nimryt+1l657yRtufv62EvPU7mTq8dyt/kyISL/EOjIa+Ltb/1Gtsmc3AEHffYWZTgKfNbIK77zeeuJnNAeYADBkyJAZNbntJiQkcPzKT40dm8tNzx7P009088NZa/rmk6VFx316zg7fX7Gjx9lKSEkhNSqB4X1Wjy2wsKmPiz+ZRUVUTt5tH1Z6XEZH2Kd4d9wqAwVHvs4H6n3p1y5hZEtADKHL3cnffAeDui4A1wJj6G3D3u909x91zsrJafwy/vUlIMI4e3JPvzBgdt21UVNU0GRa1yiqq43qnwVWbi6nRnQxF2q1472HkAqPNbDjwKXAJ8JV6y8wFLgfeAS4CXnV3N7MsIsFRbWYjgNFAfpzb226NzOrKUYN7smTjrgbnJyXAE1edQFpyIqUV1ZRVRA4HlVVWU1pRHUyrinpdTWnlZ9MKi8tZXbi3yTYM79OFwb0z6BIcyuqSGjynJJKRmkTX1PrTk8hITaRLShIpScbM29+gsLii0fp/en0N7+Tv4KbzJzApu2erfl8iEntxDYzgnMQ1wDwil9Xe7+7LzewmIM/d5wL3AQ+b2WqgiEioAJwC3GRmVUA1cJW7F8Wzve3dry6YyKy736Gk/MCrqX7y+fGtusqousY59ZbXKNjZ8GGhrqlJPHvtSa06qX77rMkNXiWVlGDUuFPj8MHGXZx/59tccuwQfnT2WHp3STno7YlIbMV9LCl3f87dx7j7SHf/VTDtp0FY4O773P1idx/l7lNrr6hy97+7+wR3P8rdj3H3Z+Ld1vZu4qAe/PPbJ/HFYwZRex1ZWnIC91+RwxUnDm9V7cQE438uPJLkxANPKRnw8/MmtCosAE4Y1Ydnrz2JS44dTGpSAkkJxoAeabzyg1N59tqTOXZYJPDc4dH3NnD67+bzyML1cT0MJiLhqad3BzOqb1du+/LRLN6wi7XbSxjQI53Tj+gXk9qnjMniiatO4A+vrubllZGBEtOSE7jn6zmcPDo254dG9e3GzV+axM1fmnTAvMe/eTz//GATv3puJduKy9lVWslPnl7Go+9t4KbzJzBlqK6gEmlLGq1W9nP04J7ce3lO3f09BvRIj1lYNMfMuGDyIF79wanMOWUEScHlvss37eFLf3qHHzy+hMLifYekLSJyIAWGNKgt+052S0vmv84ZxwvfO5kTR2XWTf/7+wWccevr3PfWWip1/3SRQ06BIQ3K7pXO8D5dyO6V3mZtGNW3G4/MPo4/fvUYBvZIA6C4vIpfPLuCz//+TRas2d5mbRPpjHQOQxrUXkaoNTPOOXIA08dm8cfX1nD3G/lUVNfw8da9fOWedzl30gB+/PlxDOjRdsEm0lkoMKRDyEhJ4odnj+WiKdn8/JnlvPbRNgCeXbqZV1cVcu3po7nypGGUV9Xw90UF5K3bSXKiMWN8ZODGpsb0EpFwFBjSoQzr04UHvjGVV1Zu5efPrGBDUSmlFdX85oVVPLJwPcX7KtkT1Wv96Q82MSm7Bw9dOZWeGerTIdIa+tolHdIZ4/rx4nWn8IMzx5CWHPlv/Omusv3CotbSgt38+Ollh7qJIocdBYZ0WGnJiVx7xmhe/v6pTB7c9FAiz3+4ma17dEmuSGsoMKTDy+6Vwaljm+4rUuPwq3+t5K1PtlPWwI2qRKR5Oochh4XuacnNLjN3ySbmLtlEcmJkBODjR2QybWQmxwzpRVpy4iFopUjHpsCQw8I5Rw7gV8+tDDXuVGW1k7tuJ7nrdvL7V1eTkpTA5ME9OX5kJtNGZDJ5SE9SkxoOkD37Knl68acs2bibjJREPjexP8ePzGzTjo4ih4oCo4Oq7VDXlh3r2pP+PdL4zumj+X8vf3zAvKQE486vTCYjNYl31uxgYf4OlhbspioIl4qqGt5dW8S7a4uAT0hNSmDK0F51eyBHZfckJSmBxRt2MvvBPIpKPxui/eGF6znjiL7c+dVjtJcihz0FRgfVXjrWtSffOWMUA3qk8efX15C/vQSA44b35rozxzBtRGSIkdpxsUrKq8hdV8TC/CLeyd/BhwW7qN05Ka+qYcGaHSxYswNegvTkRCYP6cniDbsoqzzw/Mcrqwq5dd5H/OTc8YfmBxVpIwoMOWyYGV8+djAX52RTVFJBclJCo+c2uqQmMX1sX6aP7QtA8b7KzwJkzQ6Wb9pdFyBlldWR8GjCY7kb+f5ZY8hI0Z+UHL70v1sOO2ZGZtfUFq3TLS2Z04/oVzdU/O6ySnLXRvY+3lmzgxWb9zS5/t7yKm5/+RPOP3og4/p3JyFB5zTk8KPAEGlAj/RkZozvx4zxkQD5+TPLeeDtdU2uc/cb+dz9Rj49M5KZNjyT40dmcsLITEb17aqT4nJYUGCIhPClY7KbDYxau0oreWH5Fl5YvgWAPl1TmTaiNyeM7MPxIzMZlpnRaICUV1Xz4vKtrNy8hx7pyZxz5AAG986I1Y8h0ioKDJEQJg7qwcVTsnliUcEB85ITjf+9dDLlVTUszI+cLF+/o7Ru/va95Ty7dDPPLt0MwIAeaXVXYJ0wMpPsXpFA+LBgN//+UB5bonqk3/z8Kr41fSQ/OntszPZSyquq2b63gu5pSXQL0X9FpJYCQySkm780iaGZGTy4YD3b95YDMHVYb340cyzHDovcPvb8owcBkXGt3lmzo+4y3k93ldXV2bx7H/9Y/Cn/WPwpAIN7pzNlSG9eWrmFkvL9r8Jy4I/z1zCoVzpfPW5oq9pfWlHF7178mL/lbmRveRWJCcaZ4/pxwzlHMDSzS6tqS+egwBAJKTHBuOb00Vx16kg2795HekoifRo5uT6oZzoXTcnmoinZuDsbikojARLsgWwrLq9bdmNRGRuLPm1y23e8/Anj+nene3py3Z5BWnJC6L2Oyuoarnggl/fWFtVNq65xXli+hbz1RTz97RPr9nREGqPAEGmhpMSEFp1XMDOGZnZhaGYXLpk6BHdnzbaS4Aqs7SzML6KopKLJGoXF5XzxTwv2b0eC0S0Ij8jzZ6+7p30WLN3Skli1pXi/sIi2fW8F//vKan5z0aTQP5N0TgoMkUPMzBjVtyuj+nblsmlDqalx/v2hPF5ZVdiiOlU1zs7SSnaWVra6TU8tLuDSqYM5YkB39ViXRikwRNpYQoJxcU52k4ExNDODL07OpnhfJcX7qtgTPH/2PvK6vKrmoNpQUe1c8McFJBiMzOrKuAHdGT+we+R5QHeyuoXr17K6sJi/5W5kY1EZ/XukcdGUbCYO6nFQbZL2R4Eh0g7MGNePKUN7sWj9zgPmJSUYt158VN2J9aZUVNXUhUhtoOzZV8XDC9fx9uqme6tDZBj4Twr38knhXuYu2VQ3Patbal14jBvQjQkDuzO8T1cSozoo3vtmPr/610qih398cME6rjltFD88e2yz25b2T4Eh0g4kJSbwwDeO5Wf/XM7cJZvqBkYc3bcrN35hQqiwAEhJSiCza+oBPd1H9e3KzNvfqKtb36ycbHpkpLBi0x5WbN5zwDmVbcXlbCvexhsfb6ublpacwNh+3Rg/sDvpyUnc//baBmv/4bXVTBzUg5kT+4f6GaT9UmDEiUaTlZbqnpbMbbOO5sefH8fqwr30yEhmbL9uMel/MapvV26bdTQ/ePwDKqv3D40rThjGjV8YX7cdd6ewuLwuPFZs3sPKzXtYu70Ej1p1X2UNSwp2s6Rgd7Pb/8uCdQqMw4ACI040mqwcrIb2EGLhvKMGkjO0F0/kFbBm214yu6Zw4eRBTMre//a2Zka/7mn0657GaUf0rZteWlHFqi3FrNy8hxWbIiGycnNxgyP41pe3voh738xn2ohMxg3ovt+hLOk4FBgincjAnul8d8bog1o3IyWJY4b04pghveqmVdc463eUcOVfclm3vbTRdSurnV/+ayUA3dKSOG54b6aNyFSAdDAKDBE5aIkJxoisrnx92jBuenZFqHWK91Xx8spCXl4ZuSqsJQHy/oadPJFXwNY9+xjSO4Mv5wxm/MDuMflZ3J138nfw1PufUlRSwYisSL+ZkVldY1K/psaZ/3EhzyzZzJ6ySsYN6M4lUwfHrMNkVXUNL67YyvPLtlBWUcVR2T2Zdexg+nZPi0l9AHNv/paWrdqA2UzgDiARuNfdb643PxV4CJgC7ABmufu6YN4NwGygGviOu89rals5OTmel5cXql2n3TqftdtLGN6nC6/9cHqLfiYR2V9ZRTUX37WAZZ8eOAz88D5duOfrU1i5uZiF+ZGhUtZsK2m0Vre0JKYO+yxAxg/sToLB/zy3knvePPDE+n/OPIJvTR/ZqvbX1Dg/enIpf39//7HCEgx+/cUjmXXskFbVr6iq4er/W1QXkrVqxyGbOXFAq+rvLa/iygdyeW/d/p0zM1ISuffrOZwwqk+T65vZInfPaW47cQ0MM0sEPgbOBAqAXOBSd18RtczVwCR3v8rMLgEudPdZZjYeeBSYCgwEXgbGuHujB0wVGCJtZ8++Sm578WOeXFTA3vIqDOjbPZVnrz35gH4chcX7eDe/KHSADO2dwbJNjd+T5NF/n8bxIzMPuu1/WbCOG+cub3BegsHz3z2Fsf27HXT92178iN+/urrBeSlJCbz+o+kM6HHwF8j811Mf8td3NzQ4r3taEm9df3qjNxOD9hMYxwM/c/ezg/c3ALj7r6OWmRcs846ZJQFbgCzg+uhlo5drbHsKDJG2V1FVw66yCnqkJ5OaFK7XeGHxPt5bWxsgRawu3NuibfbvnsYxQ3s2v2AjXv9oGyUVjZ+8H9o7gwmDDu7QV43Dqyu3UlHd+Gft6L5dGd3v4A59VVY7r6zcSiNXTAPwywsm8rVpjQ9eGTYw4n0OYxCwMep9AVD/8qG6Zdy9ysx2A5nB9IX11h0Uv6aKSCykJCXQt1vLjpv37ZbGuZMGcu6kgUCk38e7a3eEDpAte/bx3IdbDrrNzVlfVMr6osZP6rdWbWfJePl4a3FM6sQ7MBo6c1U/BxtbJsy6mNkcYA7AkCGtO86o64yRAAAKnklEQVQoIu1DVrfU/QLk5N+8ysadZc2sJY3pmR6b+57EOzAKgMFR77OBTY0sUxAckuoBFIVcF3e/G7gbIoekwjZMHetEOo4LJw9q9BwAwH+fO65V9wv52dzlPJa7sdH5t886ulUdD6/962JeWrm10fkPXTmVqcPD9eZvyFfvWciiDbsanf+FowYedO1o8Q6MXGC0mQ0HPgUuAb5Sb5m5wOXAO8BFwKvu7mY2F/irmd1G5KT3aOC9WDVMHetEOo7ZJ4/guWVbGjw0NXlIT7563NBWjbJ73ZljeP3jbWzeve+AeaeOyeILRw1sVV+RG845gtx1RewqO3Bk4QuOHsjJo/u0qkf/jedN4JK7F1LawHmY2ScNZ3S/gz9hHy0hJlUa4e5VwDXAPGAl8Li7Lzezm8zsvGCx+4BMM1sNfJ/PTnYvBx4HVgAvAN9u6gopETl89UhP5vFvHs9l04bSJSUSDD3Tk5lzyggenn1cq4dk79c9jX9cfQIXTckmNSnysdinayrfPWM0d399Sqs7Fo7I6spT3z6RcycNIDkxUmtQz3Ru+NwR/O7LR7d6+JdJ2T158qoTmDGuH7VNHd6nC7+8YCI/+fy4VtWOFvd+GIdSS66SEpGOqaq6hpLyarqmJcWlh3hldQ2lFdV0S00iIQ71K6pqKKuspntaUszu0x5tX2U1FdU1dEsNX7+9XCUlIhJTSYkJ9MiI38GR5MQEeqTHr35KUgIpSfGrn5acGLebYMX1kJSIiBw+FBgiIhKKAkNEREJRYIiISCgKDBERCUWBISIioSgwREQklMOq456ZbQPWt2CVPsD2ODVH9VVf9eNXvyO3vT3WH+ruWc0tdFgFRkuZWV6Y3o2qr/qq377qd+S2d+T6OiQlIiKhKDBERCSUzh4Yd6u+6qt+h6zfkdveYet36nMYIiISXmffwxARkZA6bWCY2Uwz+8jMVpvZ9TGufb+ZFZrZsljWjao/2MxeM7OVZrbczL4b4/ppZvaemS0J6v88lvWDbSSa2WIzezbWtYP668zsQzP7wMxiepMUM+tpZk+a2arg3+D4GNYeG7S59rHHzL4Xq/rBNq4L/l2XmdmjZpYW4/rfDWovj0XbG/p7MrPeZvaSmX0SPPeKcf2Lg/bXmFmrrjZq7PPAzK4NPoOWm9lvY9z+v0X9H1pnZh+05meo4+6d7gEkAmuAEUAKsAQYH8P6pwDHAMvi1P4BwDHB627AxzFuvwFdg9fJwLvAtBj/DN8H/go8G6ff0TqgT5xq/wX4t+B1CtAzTttJBLYQuUY+VjUHAWuB9OD948AVMaw/EVgGZBC5387LwOhW1jzg7wn4LXB98Pp64Dcxrj8OGAvMB3Li0P7Tgt9NavC+byzr15v/O+Cnsfj37ax7GFOB1e6e7+4VwGPA+bEq7u5vAEWxqtdA/c3u/n7wupjI7W8HxbC+u3vtzZOTg0fMTnaZWTbweeDeWNU8VMysO5E/0PsA3L3C3XfFaXNnAGvcvSWdUcNIAtLNLInIB/umGNYeByx091KP3KL5deDC1hRs5O/pfCLBTfB8QSzru/tKd//oYGs2Vx/4FnCzu5cHyxTGuD4AFrnl3peBRw+2frTOGhiDgI1R7wuI4QfuoWRmw4DJRPYCYlk3MdiNLQRecvdY1r8d+A+gJoY163PgRTNbZGZzYlh3BLANeCA4pHavmXWJYf1olxCjP/Ra7v4pcCuwAdgM7Hb3F2O4iWXAKWaWaWYZwDnA4BjWr9XP3TdD5AsU0DcO24inMcDJZvaumb1uZsfGaTsnA1vd/ZNYFOusgdHQjW473OViZtYV+DvwPXffE8va7l7t7kcD2cBUM5sYi7pmdi5Q6O6LYlGvCSe6+zHA54Bvm9kpMaqbRGT3/0/uPhkoIXJIJKbMLAU4D3gixnV7Efl2PhwYCHQxs6/Fqr67rwR+A7wEvEDkcG9VrOofRpKAXsA04EfA4xaPG3zDpcTwS0dnDYwC9v/Wk01sd8vjzsySiYTF/7n7P+K1neBwy3xgZoxKngicZ2briBwKPN3MHolR7Truvil4LgSeInIYMhYKgIKoPa4niQRIrH0OeN/dt8a47gxgrbtvc/dK4B/ACbHcgLvf5+7HuPspRA6VxOTbbT1bzWwAQPB80Id02kgB8I/g8O97RPa2+8RyA8Ehxy8Cf4tVzc4aGLnAaDMbHnyTuwSY28ZtCi34JnIfsNLdb4tD/Swz6xm8TifyIbMqFrXd/QZ3z3b3YUR+76+6e8y+4QKYWRcz61b7GjiLyKGSVnP3LcBGMxsbTDoDWBGL2vXE9JthlA3ANDPLCP4fnUHkHFjMmFnf4HkIkQ+sePwcc4HLg9eXA/+Mwzbi6WngdAAzG0Pk4olYD0Y4A1jl7gUxqxiLM+cd8UHk2OrHRK6W+nGMaz9K5PhwJZFvErNjXP8kIofQlgIfBI9zYlh/ErA4qL+MGF1h0cB2phOHq6SInGdYEjyWx+Hf92ggL/j9PA30inH9DGAH0CNOv/efE/kCsAx4mOBKnRjWf5NIiC4BzohBvQP+noBM4BUiey+vAL1jXP/C4HU5sBWYF+P6KcAjwb/B+8DpsawfTH8QuCqW/7bq6S0iIqF01kNSIiLSQgoMEREJRYEhIiKhKDBERCQUBYaIiISiwBARkVAUGHLYMLMFwfMwM/tKW7cHwMwGmtmTMaz3oJldFKt68aophycFhhw23L12iIthQJsHhpklufsmd9eHsRwWFBhy2DCz2iHZbyYyEugHwc2CEs3sFjPLNbOlZvbNYPnpwUihj5vZx2Z2s5l91SI3j/rQzEY2sa0HzezPZvZmsO65wfQrzOwJM3uGyGi5w2pvbBO049ag9lIzuzaYPiVoxyIzm1c7RlKIn/eA9cxsnJm9F7XMMDNb2prtiNRKausGiMTB9cAP3b32Q3wOkWG8jzWzVOBtM6sd0vsoIvdwKALygXvdfapF7mJ4LdDUHeOGAacCI4HXzGxUMP14YJK7FwXDz9eaQ2SU2MnuXmWRu8YlA/8LnO/u28xsFvAr4MqmfsDG1nP3K80sxcxGuHs+MIvISKgHtR2RaAoM6QzOAiZFHafvAYwGKoBcD+6rYGZrgNog+ZDIXdGa8ri71wCfmFk+cEQw/SV3b+iGNjOAP3vkxkIEgTKRyF3qXgpGt04kMi5Qc8Y2sd7jRG6aczORwJjVzPIioSgwpDMw4Fp3n7ffRLPpRAaXq1UT9b6G5v8+6g/EVvu+pIl21F/HgOXu3tL7gje13t+AJ8zsH0RuoPiJmR15kNsRqaNzGHI4KiZyr/Na84BvBYdlMLMxFpu75F1sZgnBuY4RQHO39HwRuCq4TwFm1jtYJ8vMjg+mJZvZhBDbbnQ9d18DVAP/zWf3QjjY7YjU0R6GHI6WAlVmtoTIEM93EDnf8H5wD4httOIe0FE+InLP6n5EhpHeZ03fNO1eIrfmXGpmlcA97v6H4FDZ782sB5G/yduJDMveKHevaGa9vwG3EDlnEmZ5kWZpeHORg2BmDxK5l0fM+liItHc6JCUiIqHokJRIE8zsx8DF9SY/4e5XxHm7dxK5/3m0O9z9gXhuV6QpOiQlIiKh6JCUiIiEosAQEZFQFBgiIhKKAkNEREJRYIiISCj/H7EKNzZzNgEFAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# item_price_level:\n", "# we can see that is_trade decreasing with item_price_level increasing.\n", "# the higher the price, the less trade probability\n", "\n", "# item_sales_level:\n", "# in general, the higher the item_sales_level, the higher trade probability\n", "\n", "# item_collected_level: \n", "# in general, the higher the item_collected_level, the higher trade probability\n", "\n", "# item_pv_level: \n", "# extrordinarly high in 3 level. is_trade subtly increasing with item_pv_level\n", "\n", "sns.pointplot(x='item_price_level', y='is_trade', data=train)\n", "#sns.pointplot(x='item_sales_level', y='is_trade', data=train)\n", "#sns.pointplot(x='item_collected_level', y='is_trade', data=train)\n", "#sns.pointplot(x='item_pv_level', y='is_trade', data=train)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# user_age_level:\n", "# is_trade increase with user_age_level increase\n", "\n", "# user_star_level: \n", "# subtly increase as user_start_level increase, and extraordinarly large in 3010 \n", "sns.pointplot(x='user_age_level', y='is_trade', data=train)\n", "#sns.pointplot(x='user_star_level', y='is_trade', data=train)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.pointplot(x='shop_review_num_level', y='is_trade', data=train)\n", "sns.pointplot(x='shop_star_level', y='is_trade', data=train)\n", "\n", "# the remaining four plots are too intensive, and are not discrimincative\n", "# thus, we want to divide them into different section\n", "\n", "# sns.pointplot(x='shop_review_positive_rate', y='is_trade', data=train)\n", "# sns.pointplot(x='shop_score_service', y='is_trade', data=train)\n", "# sns.pointplot(x='shop_score_delivery', y='is_trade', data=train)\n", "# sns.pointplot(x='shop_score_description', y='is_trade', data=train)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "shop_score_description 0.965535\n", "Name: 0.2, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train[['shop_score_description']].quantile(0.20)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"\\ndef f(x):\\n if x > 0.984:\\n return 2\\n elif x > 0.97 and x <= 0.984:\\n return 1\\n else:\\n return 0\\ntrain['shop_score_description'] = train'shop_score_description'].apply(f)\\nsns.pointplot(x = 'shop_score_description', y = 'is_trade', data=train)\\n\"" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "def f(x):\n", " if x > 0.984:\n", " return 2\n", " elif x > 0.97 and x <= 0.984:\n", " return 1\n", " else:\n", " return 0\n", "train['shop_score_description'] = train'shop_score_description'].apply(f)\n", "sns.pointplot(x = 'shop_score_description', y = 'is_trade', data=train)\n", "'''" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "train.to_csv('data/complete_train.csv', index = None)\n", "test.to_csv('data/test.csv',index = None)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['instance_id', 'item_id', 'item_property_list', 'item_brand_id',\n", " 'item_city_id', 'item_price_level', 'item_sales_level',\n", " 'item_collected_level', 'item_pv_level', 'user_id', 'user_gender_id',\n", " 'user_age_level', 'user_occupation_id', 'user_star_level', 'context_id',\n", " 'context_timestamp', 'context_page_id', 'shop_id',\n", " 'shop_review_num_level', 'shop_review_positive_rate', 'shop_star_level',\n", " 'shop_score_service', 'shop_score_delivery', 'shop_score_description',\n", " 'is_trade', 'datetime', 'day', 'hour', 'time', 'is_midnight',\n", " 'is_morning', 'is_afternoon', 'is_night', 'item_category',\n", " 'cate_precision', 'cate_recall', 'prop_precision', 'prop_recall'],\n", " dtype='object')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.columns" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
instance_iditem_iditem_property_listitem_brand_iditem_city_iditem_price_levelitem_sales_levelitem_collected_levelitem_pv_leveluser_id...timeis_midnightis_morningis_afternoonis_nightitem_categorycate_precisioncate_recallprop_precisionprop_recall
010864107471412696434127203770986760692072967855524022579;5131280576272319091;263639...19755904377490328703948283326616421003334144505772604969228686...22:09:04000157993470679825565200.4000001.00.00.000000
1575471355159972516134127203770986760692072967855524022579;5131280576272319091;263639...19755904377490328703948283326616421003334142692638157208937547...00:00:32100057993470679825565201.0000001.00.20.045455
284267948129104098134127203770986760692072967855524022579;5131280576272319091;263639...19755904377490328703948283326616421003334145247924392014515924...15:04:12001057993470679825565200.6666671.01.00.045455
393708885005918902734127203770986760692072967855524022579;5131280576272319091;263639...19755904377490328703948283326616421003334142681414445369714628...18:17:50001057993470679825565200.4000001.00.00.000000
4797569706501770807234127203770986760692072967855524022579;5131280576272319091;263639...19755904377490328703948283326616421003334142729475788342039013...07:48:40100057993470679825565201.0000001.01.00.045455
\n", "

5 rows × 38 columns

\n", "
" ], "text/plain": [ " instance_id item_id \\\n", "0 108641074714126964 3412720377098676069 \n", "1 5754713551599725161 3412720377098676069 \n", "2 842679481291040981 3412720377098676069 \n", "3 937088850059189027 3412720377098676069 \n", "4 7975697065017708072 3412720377098676069 \n", "\n", " item_property_list item_brand_id \\\n", "0 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "1 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "2 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "3 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "4 2072967855524022579;5131280576272319091;263639... 1975590437749032870 \n", "\n", " item_city_id item_price_level item_sales_level \\\n", "0 3948283326616421003 3 3 \n", "1 3948283326616421003 3 3 \n", "2 3948283326616421003 3 3 \n", "3 3948283326616421003 3 3 \n", "4 3948283326616421003 3 3 \n", "\n", " item_collected_level item_pv_level user_id ... \\\n", "0 4 14 4505772604969228686 ... \n", "1 4 14 2692638157208937547 ... \n", "2 4 14 5247924392014515924 ... \n", "3 4 14 2681414445369714628 ... \n", "4 4 14 2729475788342039013 ... \n", "\n", " time is_midnight is_morning is_afternoon is_night \\\n", "0 22:09:04 0 0 0 1 \n", "1 00:00:32 1 0 0 0 \n", "2 15:04:12 0 0 1 0 \n", "3 18:17:50 0 0 1 0 \n", "4 07:48:40 1 0 0 0 \n", "\n", " item_category cate_precision cate_recall prop_precision \\\n", "0 5799347067982556520 0.400000 1.0 0.0 \n", "1 5799347067982556520 1.000000 1.0 0.2 \n", "2 5799347067982556520 0.666667 1.0 1.0 \n", "3 5799347067982556520 0.400000 1.0 0.0 \n", "4 5799347067982556520 1.000000 1.0 1.0 \n", "\n", " prop_recall \n", "0 0.000000 \n", "1 0.045455 \n", "2 0.045455 \n", "3 0.000000 \n", "4 0.045455 \n", "\n", "[5 rows x 38 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(478111, 38)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.shape" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(61259, 37)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }