1:- module(crp_tagged, 2 [ empty_classes/1 3 , dec_class//3 4 , inc_class//1 5 , remove_class//1 6 , add_class//2 7 8 , crp_prob/5 9 , crp_sample/5 10 , crp_sample_obs/7 11 , crp_sample_obs/8 12 , crp_sample_rm/5 13 14 , dp_sampler_teh/3 15 , py_sampler_teh/4 16 ]).

Chinese Restaurant Process utilities (alternative version)

This module provides some building blocks for implementing a family of random processes related to Dirichlet processes, including Pitman Yor processe, the Chinese Restaurant process, and the stick breaking model (GEM). The Dirichlet processes takes a single concentration parameter, representated as dp(Conc), while the Pitman Yor process takes a concentration parameter and a discount parameter, representated as py(Conc,Disc).

gem_param   ---> dp(nonneg) ; py(nonneg,0--1).
gamma_prior ---> gamma(nonneg, nonneg).
beta_prior  ---> beta(nonneg, nonneg).
classes(A)  ---> classes(natural, list(nonneg), list(A)).
action(A)   ---> new ; old(A, class_idx).
action      ---> new ; old(class_idx).

rndstate  == plrand:state
class_idx == natural
prob      == 0--1

param_sampler == pred(+gem_param, -gem_param, +rndstate, -rndstate).

This may seems like a very low-level library for building CRPs, leaving a lot for the implemeenter to do, but this is intentional, to allow the implementer freedom to decide how to manage the states (terms of type classes(_)) of one or more CRPs, as well as the state of the random generator, in whatever way is most appropriate. See the the example implementation of test_crp.pl for one way to do this. */

49% :- use_module(library(dcg_core)). 50% :- use_module(library(dcg_macros)). 51:- use_module(library(apply_macros)). 52:- use_module(library(plrand), [crp_prob/5]). 53:- use_module(library(math), [sub/3, equal/3, stoch/3, mul/3]). 54:- use_module(library(prob/tagged), [discrete//2]).

62% crp_prob( Alpha, classes(_,Counts,Vals), X, PProb, P) :- 63% counts_dist( Alpha, Counts, Counts1), 64% stoch( Counts1, Probs, _), 65% maplist( equal(X), Vals, Mask), 66% maplist( mul, [PProb | Mask], Probs, PostProbs), 67% sumlist( PostProbs, P).

78crp_sample( Alpha, classes(_,Counts,Vals), Action, RS1, RS2) :- 79 counts_dist(Alpha, Counts, Counts1), 80 stoch(Counts1, Probs, _), 81 discrete(Probs,Z,RS1,RS2), 82 ( Z>1 -> succ(C,Z), nth1(C,Vals,X), Action=old(X,C) 83 ; Action=new).

97crp_sample_obs(GEM, classes(_,Counts,Vals), X, PBase, Action, RS1, RS2) :- 98 counts_dist( GEM, Counts, [CNew|Counts1]), 99 PNew is CNew*PBase, 100 maplist( posterior_count(X),Vals,Counts1,Counts2), 101 stoch([PNew|Counts2], Probs, _), 102 discrete(Probs, Z, RS1, RS2), 103 (Z=1 -> Action=new; succ(C,Z), Action=old(C)). 104 105crp_sample_obs( GEM, classes(_,Counts,Vals), X, PBase, A, ProbX, RS1, RS2) :- 106 counts_dist( GEM, Counts, [CNew|Counts1]), 107 PNew is CNew*PBase, 108 maplist( posterior_count(X),Vals,Counts1,Counts2), 109 sumlist([CNew|Counts1], Total), 110 stoch([PNew|Counts2], Probs, TotalX), 111 ProbX is TotalX / Total, 112 discrete(Probs, Z, RS1, RS2), 113 (Z=1 -> A=new; succ(C,Z), A=old(C)).

121crp_sample_rm( classes(_,Counts,Vals), X, Class, RS1, RS2) :- 122 maplist(posterior_count(X),Vals,Counts,Counts1), 123 stoch(Counts1, Probs, _), 124 discrete(Probs, Class, RS1, RS2). 125 126 127 128% -------------------------------------------------------------------------------- 129% classes data structure (basic CRP stuff)

142dec_class(N,CI,X,classes(K,C1,Vs),classes(K,C2,Vs)) :- dec_nth(N,CI,C1,C2), nth1(N,Vs,X). 143dec_nth(1,Y,[X|T],[Y|T]) :- succ(Y,X). 144dec_nth(N,Y,[X|T1],[X|T2]) :- succ(M,N), dec_nth(M,Y,T1,T2).

165add_class(X,N2,classes(N1,C1,V1),classes(N2,C2,V2)) :- 166 succ(N1,N2), 167 append(C1,[1],C2), 168 append(V1,[X],V2). 169 170 171remove_nth(1,[_|T],T). 172remove_nth(N,[Y|T1],[Y|T2]) :- 173 ( var(N) 174 -> remove_nth(M,T1,T2), succ(M,N) 175 ; succ(M,N), remove_nth(M,T1,T2) 176 ). 177 178posterior_count(X,Val,Count,PC) :- X=Val -> PC=Count; PC=0. 179 180% ----------------------------------------------------------- 181% Dirichlet process and Pitman-Yor process 182% pseudo-counts models. 183 184counts_dist(dp(Alpha),Counts,[Alpha|Counts]) :- !. 185counts_dist(py(_,_),[],[1]) :- !. 186counts_dist(py(Alpha,Discount),Counts,[CNew|Counts1]) :- !, 187 length(Counts,K), 188 CNew is Alpha+Discount*K, 189 maplist(sub(Discount),Counts,Counts1). 190 191% --------------------------------------------------------------- 192% PARAMETER SAMPLING 193% Initialisers in Prolog, samplers written in C.