#!/usr/bin/env python

import sys
import struct
import StringIO
import table
import charstrings

class Header(table.Table):
	definition = [
		("major_version", "B"),
		("minor_version", "B"),
		("header_size", "B"),
		("global_offset_size", "B"),
	]

class INDEX(table.Table):
	definition = [
		("count", ">H"),
	]
	extra_field_names = ["offset_size", "offsets", "entries"]
	def read_remainder_from_stream(self, stream):
		self.offsets = []
		self.data = None
		if self.count > 0:
			offset_size = ord(stream.read(1))
			self.offset_size = offset_size
			raw_offsets = stream.read((self.count + 1) * offset_size)
			offset_decoder = {
				1: "B",
				2: ">H",
				3: ">BH",
				4: ">I",
			}[offset_size]
			format = offset_decoder
			def simplify(a):
				if len(a) == 1:
					a = (0, a[0])
				return (a[0] << 16) | a[1]
			self.offsets = [simplify(struct.unpack(format, raw_offsets[i * offset_size : (i + 1) * offset_size])) for i in range(self.count + 1)]
			#self.offsets = struct.unpack(format, raw_offsets)
			assert(len(self.offsets) > 0)
			data_size = self.offsets[-1] - 1
			self.data = stream.read(data_size)
			self.decode_index()
		self.fixup_after_reading()

	def decode_index(self):
		sorted_offsets = sorted(self.offsets)
		self.chunks = {}
		for index, offset in enumerate(sorted_offsets[:-1]):
			end_offset = sorted_offsets[index + 1]
			chunk = self.data[offset - 1 : end_offset - 1]
			if hasattr(self.__class__, "element_type"):
				stream = StringIO.StringIO(chunk)
				#f = open("Q", "wb")
				#f.write(chunk)
				#f.close()
				chunk = self.__class__.element_type.read_from_stream(stream)
			self.chunks[offset] = chunk

	def fixup_after_reading(self):
		self.entries = []
		for offset in self.offsets[: -1]:
			chunk = self.chunks[offset]
			self.entries.append(chunk)

class Name_INDEX(INDEX):
	pass

def operator_P(input):
	return input < 22

class DICT(table.Table):
	DICT_keys = [
		("version", 0, ["SID"]),
		("Notice", 1, ["SID"]),
		("Copyright", [12, 0], ["SID"]),
		("FullName", 2, ["SID"]),
		("FamilyName", 3, ["SID"]),
		("Weight", 4, ["SID"]),
		("isFixedPitch", [12, 1], ["boolean"]),
		("ItalicAngle", [12, 2], ["number"]),
		("UnderlinePosition", [12, 3], ["number"]),
		("UnderlineThickness", [12, 4], ["number"]),
		("PaintType", [12, 5], ["number"]),
		("CharStringType", [12, 6], ["number"]),
		("FontMatrix", [12, 7], ["array"]),
		("UniqueID",  13, ["number"]),
		("FontBBox",  5, ["array"]),
		("StrokeWidth", [12, 8], ["number"]),
		("XUID",  14, ["array"]),
		("Charset",  15, ["number"]), # , charset offset(0)"]),
		("Encoding",  16, ["number"]), # , encoding offset(0)"]),
		("CharStrings",  17, ["number"]), # , charstrings offset(0)"]),
		("Private",  18, ["number", "number"]), # Private DICT size and offset(0)"]),
		("SyntheticBase", [12, 20], ["number"]), # synthetic base font index"]),
		("PostScript",  [12, 21], ["SID"]), # code
		("BaseFontName",  [12, 22], ["SID"]),
		("BaseFontBlend",  [12, 23], ["delta"]),
		#-- for CIDFonts:
		("ROS",  [12, 30], ["SID"]),
		("CIDFontVersion",  [12, 31], ["number"]),
		("CIDFontRevision",  [12, 32], ["number"]),
		("CIDFontType",  [12, 33], ["number"]),
		("CIDCount", [12, 34], ["number"]),
		("UIDBase", [12, 35], ["number"]),
		("FontDictArray", [12, 36], ["number"]), #, Font DICT INDEX offset(0)"]),
		("FontDictSelect", [12, 37], ["number"]), # , Font DICT select offset(0)"]),
		("FontName", [12, 38], ["SID"]),
	]
	definition = []
	extra_field_names = ["entries"]
	def read_remainder_from_stream(self, stream):
		self.entries = []
		operands = [] # TODO limit: 48
		DICT_keycode_prefixes = dict([
			(key_value[0] if isinstance(key_value, list) else key_value, (key_name, key_value, parameters)) for key_name, key_value, parameters in self.__class__.DICT_keys])
		while True:
			input = stream.read(1)
			if input == "": # EOF
				break
			b0 = ord(input)
			if operator_P(b0):
				if b0 in DICT_keycode_prefixes:
					if b0 == 12:
						b1 = ord(stream.read(1))
						key_name, key_value, parameters = [(key_name, key_value, parameters) for key_name, key_value, parameters in self.__class__.DICT_keys if key_value == [b0, b1]][0]
					else:
						key_name, key_value, parameters = DICT_keycode_prefixes[b0]

					#print "YEP", key_name, operands
					for parameter_index, parameter_type in enumerate(parameters):
						pass
					self.entries.append((key_name, operands))
					operands = []
				else:
					assert(b0 in DICT_keycode_prefixes)
			else:
				#print "operand", b0,
				#print "offs", stream.tell()
				operand = table.read_type_2_operand(stream, b0)
				# FIXME operand = table.read_operand(stream, b0)
				#print "really",operand
				operands.append(operand)


class Top_DICT(DICT):
	def get_first(self, key):
		for entry in self.entries:
			if entry[0] == key:
				return entry[1]
		return None

class Top_DICT_INDEX(INDEX):
	element_type = Top_DICT

	def read_remainder_from_stream(self, stream):
		INDEX.read_remainder_from_stream(self, stream)


class EncodingFormat0(table.Table):
	definition = [
		("nCodes", "B"),
	]
	extra_field_names = ["codes"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		self.codes = [x for x in struct.unpack("%dB" % self.nCodes, stream.read(self.nCodes))]

	def get_glyph_for_code(self, code):
		#print >>sys.stderr, "CODE", code, "min code", min(self.codes), "max code", max(self.codes), "codes", self.codes
		return self.codes.index(code) + 1

class CRange1(table.Table):
	definition = [
		("first", ">H"),
		("nLeft", "B"), # excluding first
	]

class Range1(table.Table):
	definition = [
		("first", "B"),
		("nLeft", "B"), # excluding first
	]

class CRange2(table.Table):
	definition = [
		("first", ">H"),
		("nLeft", ">H"), # excluding first
	]

class Range2(table.Table):
	definition = [
		("first", "B"),
		("nLeft", ">H"), # excluding first
	]

class EncodingFormat1(table.Table):
	definition = [
		("nRanges", "B"),
	]
	extra_field_names = ["ranges"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		self.ranges = []
		for i in range(self.nRanges):
			range_1 = Range1.read_from_stream(stream)
			self.ranges.append(range_1)

	def get_glyph_for_code(self, code):
		#print >>sys.stderr, "code:", code
		#print >>sys.stderr, "ranges:", self.ranges
		x = 1
		for range_1 in self.ranges:
			if code >= range_1.first and code <= range_1.first + range_1.nLeft:
				return x + code - range_1.first
			x += range_1.nLeft + 1

		return None

class EncodingSupplement(table.Table):
	definition = [
		("code", "B"),
		("glyph", ">H"), # FIXME SID.
	]

class EncodingSupplements(table.Table):
	definition = [
		("nSupplements", "B"),
	]
	def __init__(self, *args, **kwargs):
		table.Table.__init__(self, *args, **kwargs)
		self.nSupplements = 0

	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		self.supplements = []
		for i in range(self.nSupplements):
			self.supplements.append(EncodingSupplement.read_from_stream(stream))

class Encoding(table.Table):
	definition = [
		("format", "B"),
	]
	extra_field_names = ["body", "supplements"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		format = self.format & 0x7F
		if format == 0:
			self.body = EncodingFormat0.read_from_stream(stream, nGlyphs = self.nGlyphs)
		elif format == 1:
			self.body = EncodingFormat1.read_from_stream(stream, nGlyphs = self.nGlyphs)
		else:
			assert(format in [0,1])
		if self.format & 0x80:
			self.supplements = EncodingSupplements.read_from_stream(stream)
		else:
			self.supplements = EncodingSupplements()

	def get_glyph_for_code(self, code):
		result = self.body.get_glyph_for_code(code)
		if result == 0:
			return None
		return result

class CharsetFormat0(table.Table):
	# FIXME
	#    SID glyph[nGlyphs-1]  names, without ".notdef"
	definition = [
	]
	extra_field_names = ["entries"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		self.entries = []
		for i in range(self.nGlyphs - 1):
			input = stream.read(2)
			if input == "": # EOF
				break
			SID = struct.unpack(">h", input)[0]
			self.entries.append(SID)

	def items(self):
		return enumerate([0] + self.entries)

class CharsetFormat1(table.Table):
	definition = [
		# NO "nRanges", "B"
	]
	extra_field_names = ["ranges"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		uncovered = self.nGlyphs - 1
		self.ranges = []
		while uncovered > 0:
			range_1 = CRange1.read_from_stream(stream)
			uncovered -= range_1.nLeft + 1
			self.ranges.append(range_1)

	def items(self):
		yield (0, 0)
		glyph_ID = 0
		for range_1 in self.ranges:
			for SID in range(range_1.first, range_1.first + range_1.nLeft + 1):
				glyph_ID += 1
				yield (glyph_ID, SID)

class CharsetFormat2(table.Table):
	definition = [
		# FIXME
		#????("nRanges", "B"),
	]
	extra_field_names = ["ranges"]
	def read_remainder_from_stream(self, stream):
		assert(False)
		table.Table.read_remainder_from_stream(self, stream)
		self.ranges = []
		for i in range(self.nRanges):
			self.ranges.append(CRange2.read_from_stream(stream))

class Charset(table.Table):
	definition = [
		("format", "B"),
	]
	extra_field_names = ["body"]
	def read_remainder_from_stream(self, stream):
		table.Table.read_remainder_from_stream(self, stream)
		#stream.read(1) # alignment
		format = self.format & 0x7F
		if format == 0:
			self.body = CharsetFormat0.read_from_stream(stream, nGlyphs = self.nGlyphs)
		elif format == 1:
			self.body = CharsetFormat1.read_from_stream(stream, nGlyphs = self.nGlyphs)
		elif format == 2:
			self.body = CharsetFormat2.read_from_stream(stream, nGlyphs = self.nGlyphs)
		else:
			assert(format in [0,1])
	def items(self):
		return [(k, get_string(self.string_index, v)) for k, v in self.body.items()]

standard_strings = [
	".notdef",
	"space",
	"exclam",
	"quotedbl",
	"numbersign",
	"dollar",
	"percent",
	"ampersand",
	"quoteright",
	"parenleft",
	"parenright",
	"asterisk",
	"plus",
	"comma",
	"hyphen",
	"period",
	"slash",
	"zero",
	"one",
	"two",
	"three",
	"four",
	"five",
	"six",
	"seven",
	"eight",
	"nine",
	"colon",
	"semicolon",
	"less",
	"equal",
	"greater",
	"question",
	"at",
	"A",
	"B",
	"C",
	"D",
	"E",
	"F",
	"G",
	"H",
	"I",
	"J",
	"K",
	"L",
	"M",
	"N",
	"O",
	"P",
	"Q",
	"R",
	"S",
	"T",
	"U",
	"V",
	"W",
	"X",
	"Y",
	"Z",
	"bracketleft",
	"backslash",
	"bracketright",
	"asciicircum",
	"underscore",
	"quoteleft",
	"a",
	"b",
	"c",
	"d",
	"e",
	"f",
	"g",
	"h",
	"i",
	"j",
	"k",
	"l",
	"m",
	"n",
	"o",
	"p",
	"q",
	"r",
	"s",
	"t",
	"u",
	"v",
	"w",
	"x",
	"y",
	"z",
	"braceleft",
	"bar",
	"braceright",
	"asciitilde",
	"exclamdown",
	"cent",
	"sterling",
	"fraction",
	"yen",
	"florin",
	"section",
	"currency",
	"quotesingle",
	"quotedblleft",
	"guillemotleft",
	"guilsinglleft",
	"guilsinglright",
	"fi",
	"fl",
	"endash",
	"dagger",
	"daggerdbl",
	"periodcentered",
	"paragraph",
	"bullet",
	"quotesinglbase",
	"quotedblbase",
	"quotedblright",
	"guillemotright",
	"ellipsis",
	"perthousand",
	"questiondown",
	"grave",
	"acute",
	"circumflex",
	"tilde",
	"macron",
	"breve",
	"dotaccent",
	"dieresis",
	"ring",
	"cedilla",
	"hungarumlaut",
	"ogonek",
	"caron",
	"emdash",
	"AE",
	"ordfeminine",
	"Lslash",
	"Oslash",
	"OE",
	"ordmasculine",
	"ae",
	"dotlessi",
	"islash",
	"oslash",
	"oe",
	"germandbls",
	"onesuperior",
	"logicalnot",
	"mu",
	"trademark",
	"Eth",
	"onehalf",
	"plusminus",
	"Thorn",
	"onequarter",
	"divide",
	"brokenbar",
	"degree",
	"thorn",
	"threequarters",
	"twosuperior",
	"registered",
	"minus",
	"eth",
	"multiply",
	"threesuperior",
	"copyright", # 170
	"Aacute",
	"Acircumflex",
	"Adieresis",
	"Agrave",
	"Aring",
	"Atilde",
	"Ccedilla",
	"Eacute",
	"Ecircumflex",
	"Edieresis",
	"Egrave",
	"Iacute", # FIXME l?
	"Icircumflex", # FIXME l?
	"Idieresis", # FIXME l?
	"Igrave", # FIXME l?
	"Ntilde",
	"Oacute",
	"Ocircumflex",
	"Odieresis",
	"Ograve",
	"Otilde",
	"Scaron",
	"Uacute",
	"Ucircumflex",
	"Udieresis",
	"Ugrave",
	"Yacute",
	"Ydieresis",
	"Zcaron",
	"aacute",
	"acircumflex",
	"adieresis",
	"agrave",
	"aring",
	"atilde",
	"ccedilla",
	"eacute",
	"ecircumflex",
	"edieresis",
	"egrave",
	"iacute",
	"icircumflex",
	"idieresis",
	"igrave",
	"ntilde",
	"oacute",
	"ocircumflex",
	"odieresis",
	"ograve",
	"otilde",
	"scaron",
	"uacute",
	"ucircumflex",
	"udieresis",
	"ugrave",
	"yacute",
	"ydieresis",
	"zcaron",
	"exclamsmall",
	"Hungarumlautsmall",
	"dollaroldstyle",
	"dollarsuperior",
	"ampersandsmall",
	"Acutesmall",
	"parenleftsuperior",
	"parenrightsuperior",
	"twodotenleader", # 237
	"onedotenleader",
	"zerooldstyle",
	"oneoldstyle",
	"twooldstyle",
	"threeoldstyle",
	"fouroldstyle",
	"fiveoldstyle",
	"sixoldstyle",
	"sevenoldstyle",
	"eightoldstyle",
	"nineoldstyle",
	"commasuperior",
	"threequartersemdash",
	"periodsuperior",
	"questionsmall",
	"asuperior",
	"bsuperior",
	"centsuperior",
	"dsuperior",
	"esuperior",
	"isuperior",
	"lsuperior", # FIXME
	"msuperior",
	"nsuperior",
	"osuperior",
	"rsuperior",
	"ssuperior",
	"tsuperior", # 265
	
	# TODO
	# 174 Agrave
]

nStdStrings = 391

class String_INDEX(INDEX):
	pass

class CharStrings_INDEX(INDEX):
	element_type = charstrings.CharString
	def read_remainder_from_stream(self, stream):
		INDEX.read_remainder_from_stream(self, stream)

def parse(f):
	header = Header.read_from_stream(f)
	f.read(header.header_size - 4) # skip junk.
	# usually version in header = 1.0
	name_index = Name_INDEX.read_from_stream(f)
	top_DICT_index = Top_DICT_INDEX.read_from_stream(f)
	#print(top_DICT_index)
	string_index = String_INDEX.read_from_stream(f)

	CharStringType = (top_DICT_index.entries[0].get_first("CharStringType") or [2])[0]
	assert(CharStringType == 2) # CFF
	#print CharStringType
	
	CharStrings_offset = top_DICT_index.entries[0].get_first("CharStrings")[0]
	f.seek(CharStrings_offset)
	char_strings_index = CharStrings_INDEX.read_from_stream(f)
	#print >>sys.stderr, "glyph count", char_strings_index.count

	# FIXME handle the other fonts in the set.
	if len(top_DICT_index.entries) > 1:
		print >>sys.stderr, "warning: there are multiple fonts in the set."
	encoding_offsets = top_DICT_index.entries[0].get_first("Encoding")
	encoding_offset = encoding_offsets[0] if encoding_offsets and len(encoding_offsets) > 0 else None
	if encoding_offset is not None:
		assert(encoding_offset > 1) # 0 and 1 are reserved values.
		f.seek(encoding_offset)
		encoding = Encoding.read_from_stream(f, nGlyphs = char_strings_index.count)
	else:
		encoding = None

	#print "Q", char_strings_index

	Charset_offset = top_DICT_index.entries[0].get_first("Charset")[0]
	if Charset_offset > 2:
		f.seek(Charset_offset)
		charset = Charset.read_from_stream(f, nGlyphs = char_strings_index.count, string_index = string_index)
	charset_decoder = dict([(v, k) for k, v in charset.items()])
	return encoding, char_strings_index, charset_decoder

def get_string(string_index, SID):
	if SID >= nStdStrings:
		return string_index.entries[SID - nStdStrings]
	else:
		return standard_strings[SID]


#print header
#print name_index
#print top_DICT_index
#print string_index
#print "VER", get_string(391)
#print "NOTICE", get_string(392)
#print "FULLNAME", get_string(393)
#print "FAMILYNAME", get_string(394)
#print "WEIGHT", get_string(395)

#print get_string(174)
#print get_string(17)

"""
Global Subr INDEX
--
FDSelect
CharStrings INDEX per-font
Font DICT INDEX per-font 
Private DICT per-font 
Local Subr INDEX per-font
Copyright etc
---


"""
#print char_strings_index


if __name__ == "__main__":
	f = open(sys.argv[1], "rb")
	print parse(f)