# solr4tcl --
#
package require Tcl 8.6
package require TclOO
package require http
package require tdom
package provide solr4tcl 0.1
#
# Solr_Request class: handle send request to Apache Solr
#
oo::class create Solr_Request {
variable server
variable path
variable solr_writer
constructor {SERVER} {
set server $SERVER
set path ""
set solr_writer "xml"
}
destructor {
}
method setDocumentPath {PATH} {
set path $PATH
}
#
# support type: xml, json and cvs
#
method setSolrWriter {WRITER} {
set solr_writer $WRITER
}
method send_request {url method {headers ""} {data ""}} {
variable tok
if {[string length $data] < 1} {
if {[catch {set tok [http::geturl $url -method $method \
-headers $headers]}]} {
return "error"
}
} else {
if {[catch {set tok [http::geturl $url -method $method \
-headers $headers -query $data]}]} {
return "error"
}
}
if {[string compare -nocase $method "HEAD"] == 1} {
set res [http::data $tok]
} else {
set res [http::status $tok]
}
http::cleanup $tok
return $res
}
#
# Call the /admin/ping servlet
#
method ping {} {
set myurl "$server/solr"
if {[string length $path] < 1} {
append myurl "/admin/ping"
} else {
append myurl "/$path/admin/ping"
}
set headerl ""
set res [my send_request $myurl HEAD $headerl]
return $res
}
#
# Simple Search interface
# params is a list, give this funcition name-value pair parameter
#
method search {query {offset 0} {limit 10} {params ""}} {
set myurl "$server/solr"
if {[string length $path] < 1} {
append myurl "/select"
} else {
append myurl "/$path/select"
}
lappend params q $query
lappend params wt $solr_writer
lappend params start $offset
lappend params rows $limit
set querystring [http::formatQuery {*}$params]
#
# The return data format is defined by wt, $solr_writer setting.
#
set headerl [list Content-Type "application/x-www-form-urlencoded; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $querystring]
return $res
}
#
# parameters - a list include key-value pair
#
method add {parameters {OVERWRITE true} {BOOST "1.0"} {COMMIT true}} {
# Try to build our XML document
set doc [dom createDocument add]
set root [$doc documentElement]
$root setAttribute overwrite $OVERWRITE
set docnode [$doc createElement doc]
$docnode setAttribute boost $BOOST
$root appendChild $docnode
foreach {key value} $parameters {
set node [$doc createElement field]
$node setAttribute name $key
$node appendChild [$doc createTextNode $value]
$docnode appendChild $node
}
set myaddString [$root asXML]
set myurl "$server/solr"
set params [list commit $COMMIT]
set querystring [http::formatQuery {*}$params]
if {[string length $path] < 1} {
append myurl "/update?$querystring"
} else {
append myurl "/$path/update?$querystring"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $myaddString]
return $res
}
#
# xmldata - xml data string want to add
#
method addData {xmldata {COMMIT true}} {
set myurl "$server/solr"
set params [list commit $COMMIT]
set querystring [http::formatQuery {*}$params]
if {[string length $path] < 1} {
append myurl "/update?$querystring"
} else {
append myurl "/$path/update?$querystring"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $xmldata]
return $res
}
#
# The <commit> operation writes all documents loaded since the last
# commit to one or more segment files on the disk
#
method commit {{WAITSEARCHER true} {EXPUNGEDELETES false}} {
set mycommitString "<commit waitSearcher=\"$WAITSEARCHER\" expungeDeletes=\"$EXPUNGEDELETES\"/>"
set myurl "$server/solr"
if {[string length $path] < 1} {
append myurl "/update"
} else {
append myurl "/$path/update"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $mycommitString]
return $res
}
#
# The <optimize> operation requests Solr to merge internal data structures
# in order to improve search performance.
#
method optimize {{WAITSEARCHER true} {MAXSegments 1}} {
set myoptimizeString "<optimize waitSearcher=\"$WAITSEARCHER\" maxSegments=\"$MAXSegments\"/>"
set myurl "$server/solr"
if {[string length $path] < 1} {
append myurl "/update"
} else {
append myurl "/$path/update"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $myoptimizeString]
return $res
}
#
# "Delete by ID" deletes the document with the specified ID
#
method deleteById {ID {COMMIT true}} {
set mydeleteString "<delete><id>$ID</id></delete>"
set myurl "$server/solr"
set params [list commit $COMMIT]
set querystring [http::formatQuery {*}$params]
if {[string length $path] < 1} {
append myurl "/update?$querystring"
} else {
append myurl "/$path/update?$querystring"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $mydeleteString]
return $res
}
#
# "Delete by Query" deletes all documents matching a specified query
#
method deleteByQuery {QUERY {COMMIT true}} {
set mydeleteString "<delete><query>$QUERY</query></delete>"
set myurl "$server/solr"
set params [list commit $COMMIT]
set querystring [http::formatQuery {*}$params]
if {[string length $path] < 1} {
append myurl "/update?$querystring"
} else {
append myurl "/$path/update?$querystring"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $mydeleteString]
return $res
}
#
# Uploading Data by using Apache Tika
#
method upload {fileContent {FILENAME ""} {COMMIT true} {ExtractOnly false} {params ""}} {
set myurl "$server/solr"
lappend params commit $COMMIT extractOnly $ExtractOnly
if {[string length $FILENAME] > 1} {
lappend params "resource.name" $FILENAME
}
set querystring [http::formatQuery {*}$params]
if {[string length $path] < 1} {
append myurl "/update/extract?$querystring"
} else {
append myurl "/$path/update/extract?$querystring"
}
set headerl [list Content-Type "text/xml; charset=UTF-8"]
set res [my send_request $myurl POST $headerl $fileContent]
return $res
}
}
Test code:
#!/usr/bin/tclsh
package require solr4tcl
set solrresquest [Solr_Request new "http://localhost:8983"]
$solrresquest setDocumentPath gettingstarted
# support xml, json or csv
$solrresquest setSolrWriter xml
set res [$solrresquest ping]
if {[string compare -nocase $res "ok"]!=0} {
puts "Apache Solr server returns not OK, close."
exit
}
set res [$solrresquest search "foundation"]
puts "Search result:"
puts $res
set parameters [list authors "Patrick Eagar" subject "Sports" dd "796.35" \
isdn "0002166313" yearpub "1982" publisher "Collins"]
set res [$solrresquest add $parameters true]
puts $res
set res [$solrresquest commit]
puts $res
set res [$solrresquest optimize]
puts $res
set res [$solrresquest deleteById "0002166313"]
puts $res
set res [$solrresquest deleteByQuery "publisher:Collins"]
puts $res
#set size [file size "./solr-word.pdf"]
#set fd [open "./solr-word.pdf" {RDWR BINARY}]
#fconfigure $fd -blocking 1 -encoding binary -translation binary
#set data [read $fd $size]
#close $fd
#set res [$solrresquest upload $data "solr-word.pdf"]
#puts $res
set res [$solrresquest search "pdf"]
puts "Search result:"
puts $res
Apache Solr 沒有像 Apache CouchDB 一樣,有將 REST API 做一個整理表,所以我只是對 search 的部份做一個簡單的 search method,然後確定有正確的回應回來(而不是無法連線,或者是傳回來一個網頁不存在的 404 錯誤網頁,如果有執行 Solr 的 Solr Quick Start)。
更新 2015/12/29:
加入上傳檔案和 index 處理的部份
更新 2016/01/02:
更新 method ping 的參數
沒有留言:
張貼留言