2015-12-28

Apache Solr test code

solr4tcl-0.1.tm

# solr4tcl --
#

package require Tcl 8.6
package require TclOO
package require http
package require tdom

package provide solr4tcl 0.1


#
# Solr_Request class: handle send request to Apache Solr
#
oo::class create Solr_Request {
    variable server
    variable path
    variable solr_writer

    constructor {SERVER} {
        set server $SERVER
        set path ""

        set solr_writer "xml"
    }

    destructor {
    }

    method setDocumentPath {PATH} {
        set path $PATH
    }

    #
    # support type: xml, json and cvs
    #
    method setSolrWriter {WRITER} {
        set solr_writer $WRITER
    }

    method send_request {url method {headers ""} {data ""}} {
        variable tok

        if {[string length $data] < 1} {
            if {[catch {set tok [http::geturl $url -method $method \
                -headers $headers]}]} {
                return "error"
            }
        } else {
            if {[catch {set tok [http::geturl $url -method $method \
                -headers $headers -query $data]}]} {
                return "error"
            }
        }

        if {[string compare -nocase $method "HEAD"] == 1} {
            set res [http::data $tok]
        } else {
            set res [http::status $tok]
        }

        http::cleanup $tok
        return $res
    }

    #
    # Call the /admin/ping servlet
    #
    method ping {} {
        set myurl "$server/solr"

        if {[string length $path] < 1} {
            append myurl "/admin/ping"
        } else {
            append myurl "/$path/admin/ping"
        }

        set headerl ""
        set res [my send_request $myurl HEAD $headerl]
        return $res
    }

    #
    # Simple Search interface
    # params is a list, give this funcition name-value pair parameter
    #
    method search {query {offset 0} {limit 10} {params ""}} {
        set myurl "$server/solr"

        if {[string length $path] < 1} {
            append myurl "/select"
        } else {
            append myurl "/$path/select"
        }

        lappend params q $query
        lappend params wt $solr_writer
        lappend params start $offset
        lappend params rows $limit
        set querystring [http::formatQuery {*}$params]

        #
        # The return data format is defined by wt, $solr_writer setting.
        #
        set headerl [list Content-Type "application/x-www-form-urlencoded; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $querystring]

        return $res
    }

    #
    # parameters - a list include key-value pair
    #
    method add {parameters {OVERWRITE true} {BOOST "1.0"} {COMMIT true}} {
        # Try to build our XML document
        set doc [dom createDocument add]

        set root [$doc documentElement]
        $root setAttribute overwrite $OVERWRITE

        set docnode [$doc createElement doc]
        $docnode setAttribute boost $BOOST
        $root appendChild $docnode

        foreach {key value} $parameters {
            set node [$doc createElement field]
            $node setAttribute name $key
            $node appendChild [$doc createTextNode $value]
            $docnode appendChild $node
        }

        set myaddString [$root asXML]
        set myurl "$server/solr"

        set params [list commit $COMMIT]
        set querystring [http::formatQuery {*}$params]

        if {[string length $path] < 1} {
            append myurl "/update?$querystring"
        } else {
            append myurl "/$path/update?$querystring"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $myaddString]

        return $res
    }

    #
    # xmldata - xml data string want to add
    #
    method addData {xmldata {COMMIT true}} {
        set myurl "$server/solr"

        set params [list commit $COMMIT]
        set querystring [http::formatQuery {*}$params]

        if {[string length $path] < 1} {
            append myurl "/update?$querystring"
        } else {
            append myurl "/$path/update?$querystring"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $xmldata]

        return $res
    }

    #
    # The <commit>  operation writes all documents loaded since the last
    # commit to one or more segment files on the disk
    #
    method commit {{WAITSEARCHER true} {EXPUNGEDELETES false}} {
        set mycommitString "<commit waitSearcher=\"$WAITSEARCHER\" expungeDeletes=\"$EXPUNGEDELETES\"/>"
        set myurl "$server/solr"

        if {[string length $path] < 1} {
            append myurl "/update"
        } else {
            append myurl "/$path/update"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $mycommitString]

        return $res
    }

    #
    # The <optimize> operation requests Solr to merge internal data structures
    # in order to improve search performance.
    #
    method optimize {{WAITSEARCHER true} {MAXSegments 1}} {
        set myoptimizeString "<optimize waitSearcher=\"$WAITSEARCHER\" maxSegments=\"$MAXSegments\"/>"
        set myurl "$server/solr"

        if {[string length $path] < 1} {
            append myurl "/update"
        } else {
            append myurl "/$path/update"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $myoptimizeString]

        return $res
    }

    #
    #  "Delete by ID" deletes the document with the specified ID
    #
    method deleteById {ID {COMMIT true}} {
        set mydeleteString "<delete><id>$ID</id></delete>"
        set myurl "$server/solr"

        set params [list commit $COMMIT]
        set querystring [http::formatQuery {*}$params]

        if {[string length $path] < 1} {
            append myurl "/update?$querystring"
        } else {
            append myurl "/$path/update?$querystring"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $mydeleteString]

        return $res
    }

    #
    #  "Delete by Query" deletes all documents matching a specified query
    #
    method deleteByQuery {QUERY {COMMIT true}} {
        set mydeleteString "<delete><query>$QUERY</query></delete>"
        set myurl "$server/solr"

        set params [list commit $COMMIT]
        set querystring [http::formatQuery {*}$params]

        if {[string length $path] < 1} {
            append myurl "/update?$querystring"
        } else {
            append myurl "/$path/update?$querystring"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $mydeleteString]

        return $res
    }

    #
    #  Uploading Data by using Apache Tika
    #
    method upload {fileContent {FILENAME ""} {COMMIT true} {ExtractOnly false} {params ""}} {
        set myurl "$server/solr"

        lappend params commit $COMMIT extractOnly $ExtractOnly

        if {[string length $FILENAME] > 1} {
            lappend params "resource.name" $FILENAME
        }

        set querystring [http::formatQuery {*}$params]

        if {[string length $path] < 1} {
            append myurl "/update/extract?$querystring"
        } else {
            append myurl "/$path/update/extract?$querystring"
        }

        set headerl [list Content-Type "text/xml; charset=UTF-8"]
        set res [my send_request $myurl POST $headerl $fileContent]

        return $res
    }
}



Test code:

#!/usr/bin/tclsh

package require solr4tcl

set solrresquest [Solr_Request new "http://localhost:8983"]
$solrresquest setDocumentPath gettingstarted

# support xml, json or csv
$solrresquest setSolrWriter xml

set res [$solrresquest ping]
if {[string compare -nocase $res "ok"]!=0} {
    puts "Apache Solr server returns not OK, close."
    exit
}

set res [$solrresquest search "foundation"]
puts "Search result:"
puts $res

set parameters [list authors "Patrick Eagar" subject "Sports" dd "796.35" \
                     isdn "0002166313" yearpub "1982" publisher "Collins"]
set res [$solrresquest add $parameters true]
puts $res

set res [$solrresquest commit]
puts $res

set res [$solrresquest optimize]
puts $res

set res [$solrresquest deleteById "0002166313"]
puts $res

set res [$solrresquest deleteByQuery "publisher:Collins"]
puts $res

#set size [file size "./solr-word.pdf"] 
#set fd [open "./solr-word.pdf" {RDWR BINARY}]  
#fconfigure $fd -blocking 1 -encoding binary -translation binary 
#set data [read $fd $size]  
#close $fd  
#set res [$solrresquest upload $data "solr-word.pdf"]
#puts $res

set res [$solrresquest search "pdf"]
puts "Search result:"
puts $res



Apache Solr 沒有像 Apache CouchDB 一樣,有將 REST API 做一個整理表,所以我只是對 search 的部份做一個簡單的 search method,然後確定有正確的回應回來(而不是無法連線,或者是傳回來一個網頁不存在的 404 錯誤網頁,如果有執行 Solr 的 Solr Quick Start)。

更新 2015/12/29:
加入上傳檔案和 index 處理的部份

更新 2016/01/02:
更新 method ping 的參數

沒有留言: