| | from collections import Counter |
| | import pandas as pd |
| | import numpy as np |
| | from scipy.spatial import cKDTree |
| |
|
| |
|
| | df_amenities = pd.read_csv("df_indonesia.csv").rename( |
| | columns={"latitude":"lat", "longitude":"lon"} |
| | ) |
| | df_banks = pd.read_csv("df_bank_indonesia.csv").rename( |
| | columns={"latitude":"lat", "longitude":"lon"} |
| | ) |
| |
|
| | df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply( |
| | lambda x: eval(x) |
| | ) |
| |
|
| | bank_coords = df_banks[['lat','lon']].values |
| | tree_banks = cKDTree(bank_coords) |
| |
|
| | amenity_coords = df_amenities[['lat','lon']].values |
| | tree_amenities = cKDTree(amenity_coords) |
| |
|
| | DATASET_COLUMNS = [ |
| | 'Dining and Drinking', 'Community and Government', 'Retail', |
| | 'Business and Professional Services', 'Landmarks and Outdoors', |
| | 'Arts and Entertainment', 'Health and Medicine', |
| | 'Travel and Transportation', 'Sports and Recreation', |
| | 'Event' |
| | ] |
| |
|
| | def compute_features(candidate_point, radius=0.005): |
| | lat, lon = candidate_point |
| |
|
| | |
| | bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius) |
| |
|
| | print("[BANK]", bank_idxs) |
| | |
| | n_banks = len(bank_idxs) |
| | if n_banks > 0: |
| | neighbors = df_banks.iloc[bank_idxs] |
| | mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) |
| | min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) |
| | else: |
| | mean_dist_banks = radius |
| | min_dist_bank = radius |
| |
|
| | |
| | amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius) |
| | amenities = df_amenities.iloc[amenity_idxs] |
| |
|
| | total_amenities = len(amenities) |
| |
|
| | |
| | |
| | all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0] |
| | category_diversity = len(set(all_category_ids)) |
| |
|
| | features = { |
| | 'num_banks_in_radius': n_banks, |
| | |
| | |
| | 'total_amenities': total_amenities, |
| | 'category_diversity': category_diversity |
| | } |
| |
|
| | |
| | print("[CATEGORIES]", all_category_ids) |
| | count_per_category = Counter(all_category_ids) |
| | for feat in DATASET_COLUMNS: |
| | print("[FEAT]",feat) |
| | |
| | features[f'num_{feat}'] = count_per_category.get(feat, 0) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | return features |