From 27bf16ac1289cde688066a624a264432ba4baffa Mon Sep 17 00:00:00 2001 From: Ben Quigley Date: Tue, 27 Feb 2018 09:38:40 -0500 Subject: IPython notebooks are Jupyter notebooks now IPython still exists, but the notebooks have been spun off into their own project called Jupyter. This file change: * updates the pip install queue so that Jupyter gets installed * removes the vague "get set up with IPython" (pip will install IPython with Jupyter), and * replaced the suggestion to work in "the IPython notebook" with "a Jupyter notebook". --- pythonstatcomp.html.markdown | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 79bbcd8d..6dde1cf0 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -13,10 +13,11 @@ This is a tutorial on how to do some typical statistical programming tasks using # 0. Getting set up ==== -""" Get set up with IPython and pip install the following: numpy, scipy, pandas, +""" To get started, pip install the following: jupyter, numpy, scipy, pandas, matplotlib, seaborn, requests. - Make sure to do this tutorial in the IPython notebook so that you get - the inline plots and easy documentation lookup. + Make sure to do this tutorial in a Jupyter notebook so that you get + the inline plots and easy documentation lookup. The shell command to open + one is simply `jupyter notebook`, then click New -> Python. """ # 1. Data acquisition ==== -- cgit v1.2.3 From 3743c596d8a6519d21e11dfa161aed4a92a742bc Mon Sep 17 00:00:00 2001 From: Thales Mello Date: Thu, 8 Mar 2018 18:03:32 -0300 Subject: Improvements to syntax and comments Improve code by using context managers to handle closing of files. Also, replace the flame-war indexing comment for one with some explanations to why things are as they are. --- pythonstatcomp.html.markdown | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 6dde1cf0..0e6f1f87 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -38,18 +38,16 @@ r.text # raw page source print(r.text) # prettily formatted # save the page source in a file: os.getcwd() # check what's the working directory -f = open("learnxinyminutes.html", "wb") -f.write(r.text.encode("UTF-8")) -f.close() +with open("learnxinyminutes.html", "wb") as f: + f.write(r.text.encode("UTF-8")) # downloading a csv fp = "https://raw.githubusercontent.com/adambard/learnxinyminutes-docs/master/" fn = "pets.csv" r = requests.get(fp + fn) print(r.text) -f = open(fn, "wb") -f.write(r.text.encode("UTF-8")) -f.close() +with open(fn, "wb") as f: + f.write(r.text.encode("UTF-8")) """ for more on the requests module, including APIs, see http://docs.python-requests.org/en/latest/user/quickstart/ @@ -71,8 +69,8 @@ pets # 1 vesuvius 6 23 fish # 2 rex 5 34 dog -""" R users: note that Python, like most normal programming languages, starts - indexing from 0. R is the unusual one for starting from 1. +""" R users: note that Python, like most C-influenced programming languages, starts + indexing from 0. R starts indexing at 1 due to Fortran influnce. """ # two different ways to print out a column -- cgit v1.2.3 From bb18dc81548a68b25227306bead977d55da23c66 Mon Sep 17 00:00:00 2001 From: Thales Mello Date: Fri, 9 Mar 2018 14:29:43 -0300 Subject: Update pythonstatcomp.html.markdown --- pythonstatcomp.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 0e6f1f87..082c7025 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -70,7 +70,7 @@ pets # 2 rex 5 34 dog """ R users: note that Python, like most C-influenced programming languages, starts - indexing from 0. R starts indexing at 1 due to Fortran influnce. + indexing from 0. R starts indexing at 1 due to Fortran influence. """ # two different ways to print out a column -- cgit v1.2.3 From 3f02799903c104631ccf778dabb30ba8b116b7bc Mon Sep 17 00:00:00 2001 From: Ben Quigley Date: Sun, 8 Apr 2018 13:44:35 -0400 Subject: Removed semicolons No semicolons needed in Python --- pythonstatcomp.html.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 6dde1cf0..66eeb7ad 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -205,7 +205,7 @@ hre["DeathY"] = extractYear(hre.Death) hre["EstAge"] = hre.DeathY.astype(int) - hre.BirthY.astype(int) # simple scatterplot, no trend line, color represents dynasty -sns.lmplot("BirthY", "EstAge", data=hre, hue="Dynasty", fit_reg=False); +sns.lmplot("BirthY", "EstAge", data=hre, hue="Dynasty", fit_reg=False) # use scipy to run a linear regression from scipy import stats @@ -222,7 +222,7 @@ rval**2 # 0.020363950027333586 pval # 0.34971812581498452 # use seaborn to make a scatterplot and plot the linear regression trend line -sns.lmplot("BirthY", "EstAge", data=hre); +sns.lmplot("BirthY", "EstAge", data=hre) """ For more information on seaborn, see - http://web.stanford.edu/~mwaskom/software/seaborn/ -- cgit v1.2.3 From cb12952d60909b0f68416190d3cc8e69b81085c8 Mon Sep 17 00:00:00 2001 From: Timothy Ubbens Date: Mon, 15 Oct 2018 23:22:14 -0400 Subject: Add hre.csv to this repo directly --- pythonstatcomp.html.markdown | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 2440d859..5444769a 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -146,7 +146,7 @@ ggplot(aes(x="age",y="weight"), data=pets) + geom_point() + labs(title="pets") """ # load some data on Holy Roman Emperors -url = "https://raw.githubusercontent.com/e99n09/R-notes/master/data/hre.csv" +url = "https://raw.githubusercontent.com/adambard/learnxinyminutes-docs/master/hre.csv" r = requests.get(url) fp = "hre.csv" with open(fp, "wb") as f: @@ -156,26 +156,19 @@ hre = pd.read_csv(fp) hre.head() """ - Ix Dynasty Name Birth Death Election 1 -0 NaN Carolingian Charles I 2 April 742 28 January 814 NaN -1 NaN Carolingian Louis I 778 20 June 840 NaN -2 NaN Carolingian Lothair I 795 29 September 855 NaN -3 NaN Carolingian Louis II 825 12 August 875 NaN -4 NaN Carolingian Charles II 13 June 823 6 October 877 NaN - - Election 2 Coronation 1 Coronation 2 Ceased to be Emperor -0 NaN 25 December 800 NaN 28 January 814 -1 NaN 11 September 813 5 October 816 20 June 840 -2 NaN 5 April 823 NaN 29 September 855 -3 NaN Easter 850 18 May 872 12 August 875 -4 NaN 29 December 875 NaN 6 October 877 - - Descent from whom 1 Descent how 1 Descent from whom 2 Descent how 2 -0 NaN NaN NaN NaN -1 Charles I son NaN NaN -2 Louis I son NaN NaN -3 Lothair I son NaN NaN -4 Louis I son NaN NaN + Ix Dynasty Name Birth Death +0 NaN Carolingian Charles I 2 April 742 28 January 814 +1 NaN Carolingian Louis I 778 20 June 840 +2 NaN Carolingian Lothair I 795 29 September 855 +3 NaN Carolingian Louis II 825 12 August 875 +4 NaN Carolingian Charles II 13 June 823 6 October 877 + + Coronation 1 Coronation 2 Ceased to be Emperor +0 25 December 800 NaN 28 January 814 +1 11 September 813 5 October 816 20 June 840 +2 5 April 823 NaN 29 September 855 +3 Easter 850 18 May 872 12 August 875 +4 29 December 875 NaN 6 October 877 """ # clean the Birth and Death columns -- cgit v1.2.3 From a5b7fe9e618c52399cf7bea539cafde7515a7b55 Mon Sep 17 00:00:00 2001 From: Timothy Ubbens Date: Mon, 15 Oct 2018 23:22:34 -0400 Subject: Add import to resolve changes in new versions of python --- pythonstatcomp.html.markdown | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pythonstatcomp.html.markdown') diff --git a/pythonstatcomp.html.markdown b/pythonstatcomp.html.markdown index 5444769a..4cff3535 100644 --- a/pythonstatcomp.html.markdown +++ b/pythonstatcomp.html.markdown @@ -186,6 +186,8 @@ rx = re.compile(r'\d+$') # match trailing digits - http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html """ +from functools import reduce + def extractYear(v): return(pd.Series(reduce(lambda x, y: x + y, map(rx.findall, v), [])).astype(int)) -- cgit v1.2.3