Table of Contents
1 04 - Using numba to release the GIL
1.0.1 Timing python code
1.0.2 Now try this with numba
1.0.3 Make two identical functions: one that releases and one that holds the GIL
1.0.4 now time wait_loop_withgil
1.0.5 not bad, but we’re only using one core
pip install contexttimer
conda install numba
conda install joblib
[1]:
from IPython.display import Image
import contexttimer
import time
import math
from numba import jit
from joblib import Parallel
import logging
One easy way to tell whether you are utilizing multiple cores is to track the wall clock time measured by time.perf_counter against the total cpu time used by all threads meausred with time.process_time
I’ll organize these two timers using the contexttimer module.
To install, in a shell window type:
pip install contexttimer
[2]:
def wait_loop(n):
"""
Function under test.
"""
for m in range(n):
for l in range(m):
for j in range(l):
for i in range(j):
i=i+4
out=math.sqrt(i)
out=out**2.
return out
[3]:
nloops=200
with contexttimer.Timer(time.perf_counter) as pure_wall:
with contexttimer.Timer(time.process_time) as pure_cpu:
result=wait_loop(nloops)
print(f'pure python wall time {pure_wall.elapsed} and cpu time {pure_cpu.elapsed}')
pure python wall time 12.900637587998062 and cpu time 12.683904
Numba is a just in time compiler that can turn a subset of python into machine code using the llvm compiler.
Reference: Numba documentation
[4]:
@jit('float64(int64)', nopython=True, nogil=True)
def wait_loop_nogil(n):
"""
Function under test.
"""
for m in range(n):
for l in range(m):
for j in range(l):
for i in range(j):
i=i+4
out=math.sqrt(i)
out=out**2.
return out
[5]:
@jit('float64(int64)', nopython=True, nogil=False)
def wait_loop_withgil(n):
"""
Function under test.
"""
for m in range(n):
for l in range(m):
for j in range(l):
for i in range(j):
i=i+4
out=math.sqrt(i)
out=out**2.
return out
[6]:
nloops=500
with contexttimer.Timer(time.perf_counter) as numba_wall:
with contexttimer.Timer(time.process_time) as numba_cpu:
result=wait_loop_withgil(nloops)
print(f'numba wall time {numba_wall.elapsed} and cpu time {numba_cpu.elapsed}')
print(f"numba speed-up factor {(pure_wall.elapsed - numba_wall.elapsed)/numba_wall.elapsed}")
numba wall time 0.05427086600684561 and cpu time 0.051916000000000295
numba speed-up factor 236.70834219543877