% wfbm3 -- search functions for Tim Bray's Wide Finder project % Author: Steve Vinoski (http://steve.vinoski.net/), 18 October 2007. % See . -module(wfbm3). -export([find/3, init/0]). -compile([native]). -define(STR, "] \"GET /ongoing/When/"). -define(REVSTR, "/nehW/gniogno/ TEG\" ]"). -define(STRLEN, length(?STR)). -define(DATELEN, length("200x/2000/00/00/")). -define(MATCHHEADLEN, length("200x/")). set_shifts(_, Count, Tbl) when Count =:= ?STRLEN - 1 -> Tbl; set_shifts([H|T], Count, Tbl) -> Shift = ?STRLEN - Count - 1, set_shifts(T, Count+1, dict:store(H, Shift, Tbl)). set_defaults([], Tbl) -> Tbl; set_defaults([V|T], Tbl) -> set_defaults(T, dict:store(V, ?STRLEN, Tbl)). init() -> set_shifts(?STR, 0, set_defaults(lists:seq(1, 255), dict:new())). check_for_dot_or_space(Bin) -> check_for_dot_or_space(Bin, 0). check_for_dot_or_space(<<$ , _/binary>>, 0) -> {nomatch, 0}; check_for_dot_or_space(Bin, Len) -> case Bin of <> -> {ok, Front}; <<_:Len/binary, $., _/binary>> -> {nomatch, Len}; _ -> check_for_dot_or_space(Bin, Len+1) end. get_tail(<<>>) -> nomatch; get_tail(Bin) -> Frontlen = ?DATELEN, <> = Bin, case Front of <<_:3/binary,"x/",Y:4/binary,$/,M:2/binary,$/,D:2/binary,$/>> -> case check_for_dot_or_space(Tail) of {ok, Match} -> {ok, <>}; {nomatch, Skip} -> {skip, Frontlen + Skip} end; _ -> nomatch end. match_front(_, -1, _, _) -> {true, 0}; match_front(Bin, Len, [C1|T], Tbl) -> <<_:Len/binary, C2:8, _/binary>> = Bin, if C1 =:= C2 -> match_front(Bin, Len-1, T, Tbl); true -> Shift = dict:fetch(C2, Tbl), if Shift =:= ?STRLEN -> {false, Shift}; true -> {false, lists:min([dict:fetch(C1, Tbl), Shift])} end end. find(Bin, _, Pid) when size(Bin) =< ?STRLEN -> Pid ! done; find(Bin, Tbl, Pid) -> SLen = ?STRLEN, <> = Bin, case match_front(Front, SLen-1, ?REVSTR, Tbl) of {false, Shift} -> <<_:Shift/binary, Next/binary>> = Bin, find(Next, Tbl, Pid); {true, _} -> case get_tail(Tail) of {ok, Match} -> Len = size(Match) + ?MATCHHEADLEN, <<_:Len/binary, Rest/binary>> = Tail, Pid ! Match, find(Rest, Tbl, Pid); {skip, Skip} -> <<_:Skip/binary, More/binary>> = Tail, find(More, Tbl, Pid); nomatch -> find(Tail, Tbl, Pid) end end.