# save this file as ~/.ipython/profile_default/startup/ipython_init.py import sys import os import pandas from pandas import * import numpy as np from numpy.random import * import re import pkgutil # remove pandas's debug function - because it masks ipytohn %debug function del debug # -------------------------------------------------------------- def remask(ser,regex,flags=re.I): """ # uses regext ocreate a mask for a DataFrame column or a series # Example of usage: # mm=bag.member # mask = remask(mm.billing_name, r'ebay') # mm[['id','billing_name']][mask] # Noe: to make case sensitive match, provide 3rd parameter flags=0 # for example: # mask = remask(mm.billing_name, r'eBay', flags=0) """ return ser.map(lambda x: True if re.search(regex, str(x), flags) else False) # -------------------------------------------------------------- def ddd(): """ # returns a simple pandas DataFrame - useful for quick tests """ aa = DataFrame({ 'id':[0,1,2,3,4,5,np.nan], 'i1':[6,5,4,3,2,1,0], 'i2':[6,5,4,4,1,1,0], 'f1':[0.0,1.01,2.002,3.0003,4.00004,5.000005,6.0000006], 'f2':[0.0,1.0,2.0,np.NaN,4.0,5.0,6.0], 'f3':[0.0,1.0,1.0,np.NaN,2.0,2.0,6.0], 'bb':[True, False, True, False, True, False, True], 'ss':['s0','s1','s2','s3',np.nan,'s5','s6'] }) # add two columns to test linear regression aa['xx'] = aa.index aa['yy'] = aa.xx.map(lambda x: x*50 + 60 + randn()) return aa[['id','i1','i2','f1','f2','f3','bb','ss','xx','yy']] # -------------------------------------------------------------- def ddd2(): """ # returns a simple pandas DataFrame useful for quick stack/unstack/pivot tests # example of usage: # xx=ddd2() # xx.set_index(["Month","name"]).stack().unstack('Month') """ aa = pandas.DataFrame([ ["Jan","name1",1,2,3], ["Jan","name2",4,5,6], ["Mar","name1",11,12,13], ["Mar","name2",14,15,16]],columns=["Month","name","c1","c2","c3"]) return aa # -------------------------------------------------------------- def ddd3(): """ # returns a simple pandas DataFrame useful for quick stack/unstack/pivot tests # example of usage: # xx=ddd3() # xx.pivot('foo','bar','baz') # A B C # one 1 2 3 # two 4 5 6 # # xx.pivot('foo', 'bar')['baz'] # A B C # one 1 2 3 # two 4 5 6 """ aa = DataFrame({ 'foo':3*['one'] + 3*['two'], 'bar':2*['A','B','C'], 'baz':[1,2,3,4,5,6] }) return aa[['foo','bar','baz']] # -------------------------------------------------------------- def mygrep(df,ss): """ # prints DataFrame or Series for some pattern. Returns a list of lines. # Example: /mygrep DF r'some regex' # notice leading "/" to tell ipython that this is a function call """ lines = df.to_string().split("\n") return [line for line in lines if re.search(ss,line)] # -------------------------------------------------------------- def mymodules(): return sorted([m[1] for m in pkgutil.iter_modules()]) # -------------------------------------------------------------- def myhist(N=30, regex='', pr=True): """ # ipython history function searches multiple sessions. # You can specify number of lines to search, and optionally regex to select from these commands. # myhist(200) # myhist(1000, 'myword') # You can return history list into a variable instead of printing: # aa = myhist(10000, 'cpx', pr=False) # Note: alternative approach to this function is to create a magic function # See how to do it here: # http://ipython.org/ipython-doc/stable/interactive/reference.html # Also you can create an ipython function which would use this ipython magic # command to output history from a range of sessions: # ~2000/1-~0/2000 """ import subprocess import re cmd = '/usr/bin/sqlite3 ~/.ipython/profile_default/history.sqlite "select source_raw from history"' txt = subprocess.check_output(cmd, shell=True) lines = txt.split('\n') lines2 = [] if len(regex) <= 0: lines2 = lines[-N:] else: for line in lines: if re.search(regex, line, re.I): lines2.append(line) lines2 = lines2[-N:] if pr: for line in lines2: print line else: return lines2