madlib-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Xiaocheng Tang <xt...@pivotal.io>
Subject Re: why the results are always different with the same SQL execution of the same SVM linear classification ...?
Date Wed, 28 Sep 2016 20:46:55 GMT
Hi George,

Could you try running the same query again but this time on the normalized data? 
Please let us know if this gives consistent results across runs.
To save you some time, below is a script that you can use to create normalized houses table:

--
-- PostgreSQL database dump
--

-- Dumped from database version 9.5.4
-- Dumped by pg_dump version 9.5.4

SET statement_timeout = 0;
SET lock_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET row_security = off;

SET search_path = public, pg_catalog;

SET default_tablespace = '';

SET default_with_oids = false;

--
-- Name: houses_normalized; Type: TABLE; Schema: public; Owner: -
--

CREATE TABLE houses_normalized (
    index bigint,
    bath double precision,
    bedroom double precision,
    id bigint,
    lot double precision,
    price bigint,
    size double precision,
    tax double precision
);


--
-- Data for Name: houses_normalized; Type: TABLE DATA; Schema: public; Owner: -
--

INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (0,
-1.17715125263266085, -1.23536477452051652, 1, 0.270191891767266823, 50000, -1.28990637897490767,
-0.946475305602717198);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (1,
0.428055000957331067, 0.449223554371096789, 2, -0.962762072170713412, 85000, -0.044121546162119725,
-0.49421217480917512);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (2,
-1.17715125263266085, 0.449223554371096789, 3, -2.00039659627693434, 22500, -0.725410126606613193,
-1.50688831549906288);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (3,
0.428055000957331067, -1.23536477452051652, 4, -0.291351497749041011, 90000, -0.258240814301817667,
-0.671184704250126329);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (4,
0.428055000957331067, 0.449223554371096789, 5, 1.23458162593657805, 133000, 0.131066945952178604,
-0.228753380647748195);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (5,
-1.17715125263266085, -1.23536477452051652, 6, 0.709660631388725038, 90500, -1.19257943891140861,
-0.199257959074256308);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (6,
1.23065812775232697, 0.449223554371096789, 7, 0.624208376462330405, 260000, 1.35738639075226675,
1.21652227645335387);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (7,
-1.17715125263266085, -1.23536477452051652, 8, 0.257984426777781828, 142500, -0.511290858466915243,
-0.857989040882241594);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (8,
0.428055000957331067, 0.449223554371096789, 9, -0.108239522906766722, 160000, 0.131066945952178604,
0.282500593292777713);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (9,
0.428055000957331067, 2.13381188326271021, 10, 0.0138351269880827975, 240000, 2.64210199959045422,
2.09155311646694608);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (10,
-1.17715125263266085, 0.449223554371096789, 11, -0.291351497749041011, 87000, -0.783806290644712633,
0.10552806385182642);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (11,
0.428055000957331067, 0.449223554371096789, 12, 0.0138351269880827975, 118600, -0.355567754365316735,
0.0662008350871705897);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (12,
0.428055000957331067, 0.449223554371096789, 13, 2.21117882509537411, 140000, 0.637167034282373757,
1.52130829937943668);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (13,
2.03326125454732276, -1.23536477452051652, 14, -0.718612772381014286, 148000, 0.228393886015677644,
0.508632158689548808);
INSERT INTO houses_normalized (index, bath, bedroom, id, lot, price, size, tax) VALUES (14,
-0.374548125837664836, 0.449223554371096789, 15, -0.962762072170713412, 65000, 0.0337400058886795293,
-0.887484462455733425);


--
-- Name: ix_houses_normalized_index; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX ix_houses_normalized_index ON houses_normalized USING btree (index);


--
-- PostgreSQL database dump complete
--


> On Sep 28, 2016, at 1:07 AM, George Lin <jiaxinlin@live.com> wrote:
> 
> Dear sir,
> 
> It is strange that , why the results are always different with the same SQL execution
of the same SVM linearclassification model and the same datasource as in the official URL
of http://madlib.apache.org/docs/latest/group__grp__svm.html <http://madlib.apache.org/docs/latest/group__grp__svm.html>
 ?
> 
> the SQL is :
> DROP TABLE IF EXISTS houses_svm, houses_svm_summary;
> SELECT madlib.svm_classification('houses',
>                                  'houses_svm',
>                                  'price < 100000',
>                                  'ARRAY[1, tax, bath, size]'
>                            );
> SELECT * FROM houses_svm;
> 
> The SQL's first time running result is :
> {0.100414417254,-0.0184708830669,0.0251331467072,0.0154637720387}    35.9262602977  
 2427.20058061271    100    15    0    {f,t}
> 
> But the SQL's 2nd time running result is :
> {0.129850669599,-0.00779450119676,0.0767045650808,0.00732510805457}    18.4553559161
   1300.96167857298    100    15    0    {f,t}
> 
> everytime's result of executing this SQL is different, why?
> 
> 
> 
> 
> 
> Thank you!
>  
> Sincerely!
> Georgelin(Lin JiaXin)
> 0086 180 500 42436(better signal)


Mime
View raw message