<cfcomponent displayName="PDF Utils" hint="Set of utilities to work with PDFs in ColdFusion 8" output="false">

<cffunction name="getText" access="public" returnType="array" output="false">
	<cfargument name="pdfpath" type="string" required="true" hint="The path to the PDF file">
	<cfset var results = arrayNew(1)>
	<cfset var ddx = "">
	<cfset var inputStruct = "">
	<cfset var outputStruct = "">
	<cfset var tempxml = getTempFile(getTempDirectory(), "pdfutils")>
	<cfset var ddxVar = "">
	<cfset var myxml = "">
	<cfset var x = "">
	<cfset var node = "">
	<cfset var text = "">
	
	<!--- does the file exist? --->
	<cfif not fileExists(arguments.pdfpath)>
		<cfthrow message="Unable to find pdf: #arguments.pdfpath#">
	</cfif>
	
	<!--- Create DDX --->
	<cfsavecontent variable="ddx">
<?xml version="1.0" encoding="UTF-8"?>
<DDX xmlns="http://ns.adobe.com/DDX/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://ns.adobe.com/DDX/1.0/ coldfusion_ddx.xsd">
<DocumentText result="Out1">
<PDF source="doc1"/>
</DocumentText>
</DDX>
	</cfsavecontent>
	
	<cfset ddx = trim(ddx)>
	
	<!--- Set my parameters --->
	<cfset inputStruct = {doc1="#arguments.pdfpath#"}>
	<cfset outputStruct = {Out1="#tempxml#"}>
	
	<!--- Process --->
	<cfpdf action="processddx" ddxfile="#ddx#" inputfiles="#inputStruct#" outputfiles="#outputStruct#" name="ddxVar">
	
	<!--- read in and parse xml --->
	<cffile action="read" file="#tempxml#" variable="myxml">
	<cfset myxml = xmlParse(myxml)>
	
	<!--- ensure valid stuff --->
	<cfif structKeyExists(myxml, "DocText") and structKeyExists(myxml.DocText,"TextPerPage") and structKeyExists(myxml.DocText.TextPerPage, "Page")>
		
		<cfloop index="x" from="1" to="#arrayLen(myxml.DocText.TextPerPage.Page)#">
			<cfset node = myxml.DocText.TextPerPage.Page[x]>
			<cfset text = node.xmltext>
			<cfset arrayAppend(results, text)>
		</cfloop>
		
	</cfif>
	
	<!---
	<cfdump var="#myxml#"><cfabort>
	--->

	<cfreturn results>
</cffunction>

</cfcomponent>