#!/usr/bin/python
# -*- coding: utf-8 -*-

# ****************************************
# +++  BioCASE 
# +++  Adapt files to Unix systems
#
# export an entire database 
# into ABCD or darwin core
#
# ****************************************

import sys, os, optparse
execfile( os.path.abspath(os.path.join(os.path.dirname( __file__ ), os.path.pardir, 'lib', 'biocase', 'adjustpath.py' ) ) )

import biocase.datasources
import biocase.querytool.querydispatcher
import biocase.querytool.querygenerator


############################################################################################################
#
# MAIN
#
#===========================================================================================================

if __name__ ==  "__main__":

	parser = optparse.OptionParser()
	parser.add_option("-d", "--dsa", dest="dsa", default=None, help="use a local datasource name as the wrapper url")
	parser.add_option("-u", "--url", dest="wrapper_url", default=None, help="use a full wrapper url incl http://")
	parser.add_option("-s", "--standard", dest="format", default='http://www.tdwg.org/schemas/abcd/1.2', help="the namespace of the standard to be used for exporting. By default this is ABCD1.20")
	parser.add_option("-f", "--file", dest="filename", default="dbexport.xml", help="write export dump to FILE", metavar="FILE")
	parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout")
	(options, args) = parser.parse_args()

	if options.verbose:
		print ''' +++  BioCASE 
 +++  Export an entire datasource into XML
'''
	ExportFile = file(options.filename,'w')
	if options.dsa is not None:
			# get local dsa
			dsaObj = biocase.datasources.Datasource(options.dsa)
			options.wrapper_url = dsaObj.getBioCASeAccessPoint()
	if options.wrapper_url is None:
		print "Please provide a local datasource name or a full wrapper URL."
		if options.verbose:
			parser.print_help()
		sys.exit(2)
	elif options.verbose:
		print "Start export of all data from datasource:\n%s\ninto standard format %s\n"%(options.wrapper_url, options.format)
		
	# export!
	QD = biocase.querytool.querydispatcher.QueryDispatcher()
	QG = biocase.querytool.querygenerator.QueryGenerator()
	rootTag = None
	nextRecNum = 0
	i = 0
	while nextRecNum is not None:
		i += 1
		protocolXML = QG.getSearchProtocol(start=nextRecNum, reqNS=options.format, respNS=options.format)
		if options.verbose:
			print "Exporting page %i, start index=%i" %(i, nextRecNum)
		recStatus = QD.sendQuery(wrapper_url=options.wrapper_url, protocolString=protocolXML)
		nextRecNum = recStatus.getNextPageIndex()
		content = QD.getContent(asDoc=False)
		if i == 1:
			# print the first page including doc stuff and root element
			rootTag = content.name
			ExportFile.write('<?xml version="1.0"?>\n<%s>'%rootTag)
		# print all child nodes separately to omit root
		# get the first child element node!
		for node in content.children:
			if node.type == 'element':
				if options.verbose:
					print "Found first sibling: %s" % node.name
				break
		# loop by getting next sibling
		ExportFile.write( node.serialize() )
		while node.next:
			node = node.next
			ExportFile.write( node.serialize() )
	# close XML root tag
	ExportFile.write('</%s>'%rootTag)
	ExportFile.close()
	if options.verbose:
		print "\n\nEXPORT FINISHED!"