#!/usr/bin/env python
#-*- coding: UTF-8 -*-

class DataParser():
	"""
    parse the data in file.dt.
    format in file.dt has to be like:
    
     ##  below is the information for data
     >> unique_flag  info1  info2  info3  ...
        index0       xxx    ddd    ww     ...
        index2       sss    ccc    --     ...

    comment start with ## start
    keys start with >>
    invalid character --

	"""

	def __init__(self, filename, colkeyindexes = [0 for i in range(100)]):
		"""
      @param:
        filename: file.dt
        indexnos: set set ith column as index
		"""
		self._colkeyindexes = colkeyindexes
		self._datadict = self.__parser(filename) 
		#print("Finished reading %s "%(filename))

	def getKeys(self):
		"""
      return keys 
		"""
		return self._datadict.keys()

	def getData(self, row, col):
		val = self._datadict["%s-!-%s"%(str(row), str(col))]
		if len(val) == 1: val = val[0]
		return val

	def getDataByKey(self, key):
		return self._datadict[key]

	def getDataByKeys(self, keys):
		data = list()
		for key in keys: data.append(self._datadict[key])
		return data
			
	def listColKeys(self, row):
		return list(set([x.strip().split("-!-")[1] for x in self._datadict.keys() if "-!-" in x and x.strip().split("-!-")[0] == row]))

	def listRowKeys(self, col):
		return list(set([x.strip().split("-!-")[0] for x in self._datadict.keys() if "-!-" in x and x.strip().split("-!-")[1] == col]))


	#########################################
	#### parse the datafile as a dictionary
	#########################################
	def __parser(self, filename):
		""" parse the file.dt as dictionary """
		datadict = dict()

		datalist, keylist = list(), list()
		with open(filename) as dtfile:
			dtlist = list()
			for line in dtfile:
				if line.find(">>") == 0:
					keylist.append(line)
					if len(keylist) == len(datalist)+2: datalist.append(dtlist)
					dtlist = list()
				else:
					if line.find("##") != 0 and line != "\n" and line != "" and len(line.strip().split()) != 0: dtlist.append(line)
			datalist.append(dtlist)

		if len(keylist) != len(datalist):
			print("dtparser WRONG: keylist and datalist lengthes are not same")
			exit(-1)

		rows, cols = list(), list()
		for i in range(len(keylist)):
			keys = keylist[i].strip().split()[1:]
			dtlist = datalist[i]
			for datals in dtlist:
				datas = datals.strip().split()
				if len(datas) != len(keys):
					print("!!! dtparser WRONG: number of keys and value are different in %s !!!"%filename)
					print(" --->>> Please use \"--\" fill blank ")
					exit(-1)
				for j in range(len(keys)):
					if self._colkeyindexes[i] >= 0: key = "%s-!-%s"%(datas[self._colkeyindexes[i]], keys[j])
					else: key = keys[j]
					if key in datadict: datadict[key] = datadict[key] + [datas[j]]
					else: datadict[key] = [datas[j]]
		return datadict
