import autograd.numpy as np
from autograd import grad, jacobian
f = loglikelihood
df = grad(f) # Fisher's score
d2f = jacobian(df) # Hessianhttps://github.com/HIPS/autograd/blob/master/docs/tutorial.md
Pulling data out of HTML and XML files. https://www.crummy.com/software/BeautifulSoup/bs4/doc/
from bs4 import BeautifulSoup
soup = BeautifulSoup(page.content, 'html.parser')
soup.children
soup.find('p') # returns first instance
soup.find_all(tag=None, class=None, id=None)
elem.get_text()
# searching using CSS selectors
soup.select("div p")
tag['attribute']from bokeh.plotting import figure, show
# data
x = [1, 2, 3, 4, 5]
y1 = [6, 7, 2, 4, 5]
y2 = [2, 3, 4, 5, 6]
y3 = [4, 5, 5, 7, 2]
# create a new plot
p = figure(title="Example", x_axis_label="x", y_axis_label="y")
# add renderers
p.line(x, y1, legend_label="Temp.", color="blue", line_width=2)
p.vbar(x=x, top=y2, legend_label="Rate", width=0.5, bottom=0, color="red")
circle = p.scatter(
x,
y3,
marker="circle",
size=80,
legend_label="Objects",
fill_color="red",
fill_alpha=0.5,
line_color="blue",
)
# change object's glyph
glyph = circle.glyph
glyph.fill_color = "blue"
# toolbar
p.toolbar.autohide = True
p.toolbar.logo = None
# view
show(p)
save(p)import openpyxl
wb = openpyxl.load_workbook(file, read_only=True, data_only=True)
ws = wb["sheet_name"]
val = ws.cell(1, 1).value
[cell[i] for cell in ws.iter_rows(min_row=..., max_row=..., min_col=..., max_col=..., values_only=True)]with open("path.file", 'rb') as f:
result = chardet.detect(f.read(10000))
print(result)import fuzzywuzzy
matches = fuzzywuzzy.process.extract("south korea", countries, limit=10, scorer=fuzzywuzzy.fuzz.token_sort_ratio)https://hypothesis.readthedocs.io/en/latest/quickstart.html
from hypothesis import given, strategies as st
@given(st.floats(), st.floats())
def test_assoc(x, y):
assert x * y == y * x
# AssertionError:
# Falsifying example: test_(
# x=0.0,
# y=inf,
# )Architecture
draw_path: Draws compound polygons, made up of line and
Bézier segments.draw_image: Draws raster images.draw_text: Draws text with the given font
properties.class SomeArtist(Artist):
'An example Artist that implements the draw method'
def draw(self, renderer):
"""Call the appropriate renderer methods to paint self onto canvas"""
if not self.get_visible(): return
# create some objects and use renderer to draw self here
renderer.draw_path(graphics_context, path, transform)# TODO: consider using
# https://matplotlib.org/3.5.1/api/ticker_api.html#matplotlib.ticker.FixedFormatter
millnames = ['','K','M','B']
def millify(n, decimals=0):
n = float(n)
millidx = max(0,min(len(millnames)-1, int(math.floor(0 if n == 0 else math.log10(abs(n))/3))))
return ('{:,.' + str(decimals) + 'f}{}').format(n / 10**(3 * millidx), millnames[millidx])
def format_yaxis(ax, decimals=0):
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: millify(x, decimals))) # format(int(x), ',')
# set y-axis range
plt.ylim(bottom=0)
ax.set_ylim(bottom=0)plt.scatter(x, y, )
ax.hist(data, bins=50, density=True, histtype="step")
Subfigures:
import matplotlib.ticker as mtick
fig = plt.figure(constrained_layout=True)
fig.suptitle('Figure title')
# create 3x1 subfigs
subfigs = fig.subfigures(nrows=3, ncols=1)
for row, subfig in enumerate(subfigs):
subfig.suptitle(f'Subfigure title {row}')
# create 1x3 subplots per subfig
axs = subfig.subplots(nrows=1, ncols=3, sharex='col', sharey='row')
for col, ax in enumerate(axs):
ax.plot(x, y, c=color_values, s=10)
ax.set_title(f'Plot title {col}')
ax.hist(x, density=True, histtype="step")
ax.set_ylim((0,None))
# gridlines
ax.set_axisbelow(True) # ensure gridlines are in background
ax.grid(visible=True, which='major', axis='both')
# h-line at 0.0; use v-line
ax.axhline(0.0, linestyle='--', color='gray')
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) # 1=100%
ax.colorbar()
fig.savefig(fig_path.joinpath(f"{name}.pdf"), bbox_inches='tight')Colors
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
%matplotlib qt # to make it interactive in notebooks
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(xline, yline, zline, 'gray')
ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens')
ax.contour3D(X, Y, Z, 50, cmap='binary')
ax.plot_wireframe(X, Y, Z, color='black')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
cmap='viridis', edgecolor='none')
ax.view_init(60, 35)matplotlib.pyplot.imread
The image data. The returned array has shape
(M, N) for grayscale images.
(M, N, 3) for RGB images.
(M, N, 4) for RGBA images.
Uses grammar of graphics. Mostly similar to ggplot2 in R. Uses matplotlib.
from plotnine import *
(ggplot(data=<DATA>) +
<GEOM_FUNCTION>(
mapping=aes(<MAPPINGS>),
stat=<STAT>,
position=<POSITION>
) +
<COORDINATE_FUNCTION> +
<FACET_FUNCTION> +
labs(x, y, title, color) +
theme() +
scale_x_log10() +
scale_y_log10()
)
(ggplot(date) +
geom_point() +
facet_wrap("var", nrow=2)
)Aesthetics are a visual property of the objects in your plot.
Geometric objects are used to represent the data.
geom_point(mapping=aes(x=_, y=_, color=_, size=_, alpha=_, shape=_))geom_smooth(mapping=aes(x, y, linetype, group, color))geom_bar(mapping=aex(..., fill, ), stat=)geom_boxplot()geom_abline()geom_text()geom_label()geom_segment()Statistics are used to summarise the data.
Facets are used to create subplots.
https://2.python-requests.org/en/latest/user/quickstart/
import requests
api = r"https://localhost:3000"
r = requests.get(api, params={"q" : url})
r = requests.post(api, data = {'key':'value'})
r.json()
r.headers # a dict
r.headers["Content-Type"]
r.status_code == 404
r.content
r.ok
r.url
r.text
r.encodingEncode categorical data. Can use OneHotEncoder too.
from sklearn.preprocessing import OrdinalEncoder
# Make copy to avoid changing original data
label_X_train = X_train.copy()
label_X_valid = X_valid.copy()
# Apply ordinal encoder to each column with categorical data
encoder = OrdinalEncoder()
label_X_train[object_cols] = encoder.fit_transform(X_train[object_cols])
label_X_valid[object_cols] = encoder.transform(X_valid[object_cols])from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='constant') # for num; for cat use 'most_frequent'
imputed_X_train = pd.DataFrame(imputer.fit_transform(X_train))
imputed_X_test = pd.DataFrame(imputer.transform(X_test))
# Imputation removed column names; put them back
imputed_X_train.columns = X_train.columns
imputed_X_test.columns = X_test.columnsfrom sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_fit)Pipelines, useful for cleaning up and productionizing.
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
num_transformer = ...
cat_transformer = Pipeline(steps=[
('imputer', ...),
('onehot', ...)
])
preprocessor = ColumnTransformer(
transformers=[
('num', num_transformer, numerical_cols),
('cat', cat_transformer, categorical_cols)
])
model = ...
# Bundle preprocessing and modeling code in a pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('model', model)])
pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_valid)