Spreadsheets

21 Jun 2016

# Let's talk about spreadsheets. Spreadsheets are businessy.
# A spreadsheet is depicted as a two-dimensional grid of cells.
# A simple model of that grid in python might be a list of lists.
sheet = [
  [0, 0, 0, 0, 0, 0],
  [0, 0, 7, 0, 0, 0],
  [0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0]
]

# Then we could print that lonely 7 using:
print(sheet[1][2])

# but there's a symmetry between rows and columns of this simple model,
# which isn't present in many big businessy spreadsheets.
# The symmetry is great for tiles in 2d maps or pixels in images, but
# in big businessy spreadsheets, each column has a meaningful name,
# (or it could have a name, if anyone bothered to rename it).
#
# We could model that in python as a list of dictionaries.
inventory_sheet = [
  { 'item': "safety pin", 'where': "user's inventory", 'quantity': 3 },
  { 'item': "pipe wrench", 'where': "the library", 'quantity': 1 },
  { 'item': "keychain", 'where': "kobold40's inventory", 'quantity': 1 },
  { 'item': "rubber stamp", 'where': "the desk drawer", 'quantity': 1 },
]

# Let's try writing some code that works with lists-of-dictionaries.

# Print out everything that is lonely.
for row in inventory_sheet:
  if row['quantity'] == 1:
    print row['item']

# Scrape the inventory sheet to start a new sheet containing all the rooms.
room_sheet = []
for row in inventory_sheet:
  room_sheet.append({ 'name': row['where'] })

# A helpful thing would be if the row number was also a field inside the row.
# We could add a field via typing 'id': 0, .. 1 ... 2 ... 3 but that sounds
# tedious. Let's make the computer do it.
# We can write a function that ADDS id fields to a sheet:
def add_ids(sheet):
  for row_number in range(len(sheet)):
    sheet[row_number]['id'] = row_number

add_ids(inventory_sheet)

# So now each row "knows" what its position is in the sheet.
print(inventory_sheet)

# If we had translations off in their own sheet,
# then we could separate the tasks of adding features to the businessy thing,
# and only later adding lines to the translations sheet.
#
# This is sometimes called "Single Responsibility Principle",
# but another way to say it is "One Reason to Change".
# The items sheet grows when a new item is added.
# The translations sheet grows when a new translation is added.
# These changes can interleave without care.

translations_sheet = [
    { 'language': 'en', 'inventory_id': 0, 'translation': 'safety pin' },
    { 'language': 'en', 'inventory_id': 1, 'translation': 'pipe wrench' },
    { 'language': 'en', 'inventory_id': 2, 'translation': 'keychain' },
    { 'language': 'en', 'inventory_id': 3, 'translation': 'rubber stamp' },
    { 'language': 'jp', 'inventory_id': 0, 'translation': 'anzenpin' },
    { 'language': 'jp', 'inventory_id': 1, 'translation': 'renchi' },
    # TODO: translate keychain into japanese
    { 'language': 'jp', 'inventory_id': 3, 'translation': 'gomuin' },
]
add_ids(inventory_sheet)

# We might have a function to print a translated item name,
# which searches for a translation,
# defaulting back to using the value of the 'item' column.
def translate_item(language, inv_row):
  for tr in translations_sheet:
    if tr['language'] == language and tr['inventory_id'] == inv_row['id']:
      # found it
      return tr['translation']
  # not found, default to whatever we have in the item field of the inventory
  return inv_row['item']

# Now we can print out all the lonely items in japanese
for inv_row in inventory_sheet:
  if inv_row['quantity'] == 1:
    print(translate_item('jp', inv_row))

# We could have finished the "print out all the lonely items in japanese",
# using two nested for loops, but using a function helped comprehensibility.
# (Not that it's particularly great at being comprensible.)
#
# Nested loops are good at dealing with multiple related/connected sheets
# (whether you break them out into separate functions or not).
#
# Let's look at a small businessy spreadsheet with three pages / sheets to it.
#

suppliers = [
  { 'name': 'Smith', 'status': 20, 'city': 'London' },
  { 'name': 'Jones', 'status': 10, 'city': 'Paris' },
  { 'name': 'Blake', 'status': 30, 'city': 'Paris' },
  { 'name': 'Clark', 'status': 20, 'city': 'London' },
  { 'name': 'Adams', 'status': 30, 'city': 'Athens' },
]
add_ids(suppliers)

parts = [
  { 'name': 'Nut', 'color': 'Red', 'weight': 12.0, 'city': 'London' },
  { 'name': 'Bolt', 'color': 'Green', 'weight': 17.0, 'city': 'Paris' },
  { 'name': 'Screw', 'color': 'Blue', 'weight': 17.0, 'city': 'Oslo' },
  { 'name': 'Screw', 'color': 'Red', 'weight': 14.0, 'city': 'London' },
  { 'name': 'Cam', 'color': 'Blue', 'weight': 12.0, 'city': 'Paris' },
  { 'name': 'Cog', 'color': 'Red', 'weight': 19.0, 'city': 'London' },
]
add_ids(parts)

shipments = [
  { 'snum': 0, 'pnum': 0, 'quantity': 300 },
  { 'snum': 0, 'pnum': 1, 'quantity': 200 },
  { 'snum': 0, 'pnum': 2, 'quantity': 400 },
  { 'snum': 0, 'pnum': 3, 'quantity': 200 },
  { 'snum': 0, 'pnum': 4, 'quantity': 100 },
  { 'snum': 0, 'pnum': 5, 'quantity': 100 },
  { 'snum': 1, 'pnum': 0, 'quantity': 300 },
  { 'snum': 1, 'pnum': 1, 'quantity': 400 },
  { 'snum': 2, 'pnum': 1, 'quantity': 200 },
  { 'snum': 3, 'pnum': 1, 'quantity': 200 },
  { 'snum': 3, 'pnum': 3, 'quantity': 300 },
  { 'snum': 3, 'pnum': 4, 'quantity': 400 },
]
add_ids(shipments)

# So now we can write examples of those nested for loops all day:

# print shipments of green parts,
# outer loop over parts,
# inner loop over shipments
for part in parts:
  if part['color'] == 'Green':
    for shipment in shipments:
      if shipment['pnum'] == part['id']:
        print("Supplier #%d shipped %d of part #%d." %
              (shipment['snum'], shipment['quantity'], part['id']))

# print shipments of green parts,
# outer loop over shipments,
# inner loop over parts
for shipment in shipments:
  goal_part_num = shipment['pnum']
  for part in parts:
    if goal_part_num == part['id'] and part['color'] == 'Green':
        print("Supplier #%d shipped %d of part #%d." %
              (shipment['snum'], shipment['quantity'], part['id']))

# These examples are linear searches, and for larger things linear searches
# may be impractically inefficient; but let's ignore that for now.
#
# But nested loops over related tables keep coming up over and over again,
# and whenever there is a pattern, programmers try to figure out the
# essence of that pattern.
#
# So the idea out there is "joining" a table to another table.
#
# In the inventory-and-translations example, this would be like
# "baking in" the translations, creating one table that has
# one row for every item/language pair.
# There's good reasons NOT to actually do that ("Change for One Reason"),
# but we can imagine doing that.
#
# In the shipments-and-parts example, we could imagine adding a column,
# color, to the shipments table, and just repeating the fact that
# part #1 is green every time we do a shipment of part #1.
#
# So this is a first, rough cut at a joiner function.
def join(table1, table2, table1_col, table2_col):
  accumulator = []
  for row1 in table1:
    for row2 in table2:
      if row1[table1_col] == row2[table2_col]:
        accumulator.append((row1, row2))
  return accumulator

# It takes two tables, and two column names.
# It does two nested loops over the tables.
# It finds and returns pairs of rows that are related
# in that they have one particular column equal.

translated = join(inventory_sheet, translations_sheet, 'id', 'inventory_id')
print translated

# Unhappily, we still have to write another for loop over the returned table.
# Is there no end to for loops?
# It's technically not a nested for loop but still...

for translated_item in translated:
  inv_row = translated_item[0]
  translations_row = translated_item[1]
  if inv_row['quantity'] == 1 and translations_row['language'] == 'jp':
    print translations_row['translation']

# Why did we build and return this accumulator thing,
# if we're just going to have to loop over it AGAIN?
#
# There's a way forward, which is to add a callback to join.
#
# The name "callback" comes from the "hollywood principle":
# "don't call us, we'll call you".
#
# We define a function, and then instead of calling that function
# directly, we PASS it as an argument to another function,
# which calls "back".
def join(table1, table2, table1_col, table2_col, callback):
  for row1 in table1:
    for row2 in table2:
      if row1[table1_col] == row2[table2_col]:
        callback(row1, row2)

# print shipments of green parts,
# outer loop over parts,
# inner loop over shipments
def print_if_green(part, shipment):
  if part['color'] == 'Green':
    print("Supplier #%d shipped %d of part #%d." %
          (shipment['snum'], shipment['quantity'], part['id']))

join(parts, shipments, 'id', 'pnum', print_if_green)

# print shipments of green parts,
# outer loop over shipments,
# inner loop over parts
def print_if_green2(shipment, part):
  if part['color'] == 'Green':
    print("Supplier #%d shipped %d of part #%d." %
          (shipment['snum'], shipment['quantity'], part['id']))

join(shipments, parts, 'pnum', 'id', print_if_green2)

# There are several whole languages specializing in this kind of thing,
# and in SQL, this would look something like this:
#
# SELECT shipments.snum, shipments.quantity, parts.id
# FROM shipments JOIN parts
# WHERE part.color == "Green"
#