{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "spend_health.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyNH4+1exhSCGhK18PO1txk5",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ohdata1/ohdata1.github.io/blob/main/spend_health.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1kpnj75N98Jy"
      },
      "outputs": [],
      "source": [
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df1 = pd.read_csv('life_expec.csv')"
      ],
      "metadata": {
        "id": "gpTfu13C9_lx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#Turn the wide-form data into long-form\n",
        "df1 = df1.melt(id_vars=[\"iso\"], \n",
        "                  var_name=\"year\", value_name=\"value\")"
      ],
      "metadata": {
        "id": "opbeWbpf-L6u"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df1['year'] = pd.to_datetime(df1['year'], format='%Y')"
      ],
      "metadata": {
        "id": "H0XhTpAPT33c"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#Filter the data to one year\n",
        "df1 =df1[(df1['year'] == '2019-01-01')]"
      ],
      "metadata": {
        "id": "sy6ZsMnKUjz-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df2 = pd.read_csv('gov_spend.csv')"
      ],
      "metadata": {
        "id": "nFzug5Qb_Z7F"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#merge the two dataframes\n",
        "df = pd.merge(df1,df2)"
      ],
      "metadata": {
        "id": "hIcuiRHxAvPu"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import requests\n",
        "%matplotlib inline\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns; sns.set()\n",
        "import numpy as np\n",
        "from sklearn.linear_model import LinearRegression\n",
        "from sklearn.preprocessing import PolynomialFeatures\n",
        "from sklearn.pipeline import make_pipeline\n",
        "from sklearn.metrics import r2_score"
      ],
      "metadata": {
        "id": "ocXQtRgiUydS"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# remove n/a values so regression is possible\n",
        "x = df.dropna()['average']\n",
        "y = df.dropna()['value']\n",
        "\n",
        "poly_model = make_pipeline(PolynomialFeatures(2),\n",
        "                           LinearRegression())\n",
        "poly_model.fit(x[:, np.newaxis], y)\n",
        "xfit = np.linspace(min(x), max(x), 1000)\n",
        "yfit = poly_model.predict(xfit[:, np.newaxis])\n",
        "\n",
        "\n",
        "plt.rcParams['axes.facecolor'] = '#DFDEDE'\n",
        "plt.scatter(x, y, color ='#0000FF')\n",
        "plt.plot(xfit, yfit,color='crimson');\n",
        "plt.xlabel('Government spending as % of GDP', x=0.5, y=0.1)\n",
        "plt.ylabel('Life expectancy at birth')\n",
        "plt.title('How does public spending affect health outcomes', x=0.5, y=1, fontweight = \"bold\")\n",
        "plt.suptitle('Sources: World Bank and ICTD', x=0.5, y=0.89, fontsize=9)\n",
        "plt.savefig('spend_health.png', dpi=300, bbox_inches = \"tight\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 436
        },
        "id": "mK7p9OmPVBdo",
        "outputId": "a7f7b550-ab9e-46c2-d359-1e3a55019367"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.\n",
            "  \n"
          ]
        },
        {
          "output_type": "error",
          "ename": "ValueError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-16-365c25f5d357>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mxfit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinspace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0myfit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoly_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxfit\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnewaxis\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mr2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mr2_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myfit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_regression.py\u001b[0m in \u001b[0;36mr2_score\u001b[0;34m(y_true, y_pred, sample_weight, multioutput)\u001b[0m\n\u001b[1;32m    773\u001b[0m     \"\"\"\n\u001b[1;32m    774\u001b[0m     y_type, y_true, y_pred, multioutput = _check_reg_targets(\n\u001b[0;32m--> 775\u001b[0;31m         \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmultioutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    776\u001b[0m     )\n\u001b[1;32m    777\u001b[0m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_regression.py\u001b[0m in \u001b[0;36m_check_reg_targets\u001b[0;34m(y_true, y_pred, multioutput, dtype)\u001b[0m\n\u001b[1;32m     87\u001b[0m         \u001b[0mthe\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0margument\u001b[0m \u001b[0mpassed\u001b[0m \u001b[0mto\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m     \"\"\"\n\u001b[0;32m---> 89\u001b[0;31m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     90\u001b[0m     \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m     \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[0;34m(*arrays)\u001b[0m\n\u001b[1;32m    331\u001b[0m         raise ValueError(\n\u001b[1;32m    332\u001b[0m             \u001b[0;34m\"Found input variables with inconsistent numbers of samples: %r\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 333\u001b[0;31m             \u001b[0;34m%\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ml\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlengths\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    334\u001b[0m         )\n\u001b[1;32m    335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mValueError\u001b[0m: Found input variables with inconsistent numbers of samples: [167, 1000]"
          ]
        }
      ]
    }
  ]
}