{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Clustering using `scikit-learn`" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn import preprocessing, cluster, metrics\n", "from sklearn.pipeline import Pipeline\n", "\n", "from scipy.spatial.distance import cdist, pdist\n", "\n", "from fastdtw import fastdtw\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read in the cryptocurrencies dataset." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "crypto = pd.read_csv('https://raw.githubusercontent.com/estimand/teaching-datasets/master/cryptocurrencies/cryptocurrencies.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert 'date' to `datetime`." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "crypto['date'] = pd.to_datetime(crypto['date'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Keep only data from November 2017 onwards." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "crypto = crypto[crypto['date'] >= '2017-11-01']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pivot close prices." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "crypto_close = crypto.pivot_table(values='close', index='date', columns='symbol')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
symbol | \n", "BCH | \n", "BTC | \n", "BTG | \n", "DASH | \n", "EOS | \n", "ETC | \n", "ETH | \n", "IOT | \n", "LTC | \n", "NEO | \n", "NXT | \n", "XLM | \n", "XMR | \n", "XRP | \n", "ZEC | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2017-11-01 | \n", "529.88 | \n", "6737.78 | \n", "133.57 | \n", "269.64 | \n", "1.05 | \n", "10.11 | \n", "289.42 | \n", "0.3644 | \n", "52.83 | \n", "26.87 | \n", "0.05576 | \n", "0.02730 | \n", "85.41 | \n", "0.1903 | \n", "221.21 | \n", "
2017-11-02 | \n", "562.79 | \n", "7024.81 | \n", "121.94 | \n", "263.22 | \n", "1.11 | \n", "10.03 | \n", "284.92 | \n", "0.3661 | \n", "54.19 | \n", "24.62 | \n", "0.06052 | \n", "0.02750 | \n", "83.08 | \n", "0.2003 | \n", "216.68 | \n", "
2017-11-03 | \n", "626.04 | \n", "7152.12 | \n", "124.12 | \n", "276.79 | \n", "1.20 | \n", "12.49 | \n", "304.51 | \n", "0.3927 | \n", "55.98 | \n", "26.67 | \n", "0.05766 | \n", "0.02740 | \n", "87.44 | \n", "0.2059 | \n", "231.16 | \n", "
2017-11-04 | \n", "614.26 | \n", "7363.80 | \n", "138.93 | \n", "273.45 | \n", "1.11 | \n", "11.77 | \n", "300.04 | \n", "0.3685 | \n", "54.83 | \n", "26.44 | \n", "0.05610 | \n", "0.02708 | \n", "87.12 | \n", "0.2015 | \n", "229.84 | \n", "
2017-11-05 | \n", "625.72 | \n", "7389.55 | \n", "164.58 | \n", "272.22 | \n", "1.09 | \n", "13.10 | \n", "296.23 | \n", "0.3506 | \n", "54.60 | \n", "26.24 | \n", "0.05714 | \n", "0.02664 | \n", "86.19 | \n", "0.1997 | \n", "224.97 | \n", "