% tbray15 -- another Erlang solution to Tim Bray's Wide Finder project
% Author: Steve Vinoski (http://steve.vinoski.net/), 18 October 2007.
% See .
-module(tbray15).
-export([start/1, start/2, main/1]).
-import(wfbm3).
-compile([native]).
receive_matches(Pid, Dict) ->
receive
done -> Pid ! Dict;
Match -> receive_matches(Pid, dict:update_counter(Match, 1, Dict))
end.
split_and_find(Bin, Blksize, Pid, Tbl) ->
Size = Blksize - 1,
case Bin of
<> ->
Rcv = spawn(fun() -> receive_matches(Pid, dict:new()) end),
spawn(fun() -> wfbm3:find(Front, Tbl, Rcv) end),
{Rcv, Tail};
_ ->
split_and_find(Bin, Size, Pid, Tbl)
end.
receive_tbls(L, Start_table) ->
lists:foldl(fun(_, Table2) ->
receive Table1 ->
dict:merge(fun(_,V1,V2) -> V1 + V2 end, Table1, Table2)
end
end, Start_table, L).
receive_tbls(L) ->
receive_tbls(L, dict:new()).
scan_file(<<>>, _, _, _, Receivers) ->
Receivers;
scan_file(Bin, Blksize, Tbl, Me, Receivers) ->
{Rcv, Next} = split_and_find(Bin, Blksize, Me, Tbl),
scan_file(Next, Blksize, Tbl, Me, [Rcv | Receivers]).
scan_file(Bin, Blksize, Tbl) ->
scan_file(Bin, Blksize, Tbl, self(), []).
top_ten(D) ->
L = lists:sort(fun({_,V1}, {_,V2}) -> V1 > V2 end, dict:to_list(D)),
if
length(L) > 10 ->
{First, _} = lists:split(10, L),
First;
true -> L
end.
start(File, Blksize) ->
Tbl = wfbm3:init(),
bfile:load_driver(),
{ok, F} = bfile:fopen(File, "r"),
{ok, Bin} = bfile:fread(F, filelib:file_size(File)),
Receivers = scan_file(Bin, Blksize, Tbl),
bfile:fclose(F),
L = top_ten(receive_tbls(Receivers)),
lists:map(fun({K,V}) -> io:format("~p: ~s~n", [V, K]) end, L).
start(File) ->
Blksize = filelib:file_size(File) div (erlang:system_info(schedulers)*32),
start(File, Blksize).
main([F, Blksize]) ->
start(F, list_to_integer(Blksize)),
halt();
main([F]) ->
start(F),
halt().