1

For a simulation/mining on graph data I need to write thousands of data at once into a postgresql 9.5 Database with high performance. Currently I'm doing that with prepared statements and "row by row INSERTs". It works fine - but too slow. I couldn't find anything about how to write data from a collection (array, vector or ArrayList) with the Copy statement via CopyManager into a Postgresql DB Table. I would be grateful to get a simple example. How to deal with data types? CopyManager does not request for any data types.

My current Code:

package io;

import db.PostgresConnector;
import entitiesP.Edge;
import entitiesP.Graph;
import entitiesP.Node;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Timestamp;



public class DbLogger {

public DbLogger(Graph g, int simNr) throws Exception {

    Connection conn = new PostgresConnector().getConnection();

    //1. Graph Data File:
        //-------------------------------------------------

            //Filename und Header:
            long timestamp = getTimeStamp();

            //Daten:
            logGraphData(conn, simNr, 
                         g.getNodes().size(), 
                         g.getEdges().size(),
                         g.getRadiusAndDiameter()[0],
                         g.getRadiusAndDiameter()[1],
                         g.getRadiusAndDiameter()[2],
                         Node.maxDegree,
                         //new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(g.getSimStartTime()),
                         g.getSimStartTime(),
                         timestamp);



        //2. Node Data File:
        //-------------------------------------------------

                //Daten einsammeln:
                for (Node n: g.getNodes()){

                    logNodeData( conn, simNr ,
                                     n.getId(),
                                     n.getDegree(),
                                     n.getClusteringCoefficient(),
                                     timestamp);

                }


            //3. RelationData File:
            //-------------------------------------------------


                int[][] distance = g.getDistanceMatrix().clone();
                //AdMatrix dist = new AdMatrix(distance);
                //dist.printAdMatrix();
                Object[] edges = g.getEdges().toArray();

                //adjacency nodes
                int numNodes = g.getNodes().size();
                int numEdges = g.getEdges().size();

                //Edges:
                for (int i=0; i < edges.length; i++){
                    Object[] nodes =  ((Edge) edges[i]).getNodes().toArray();    //pair of nodes of an edge
                    //pairs A--B
                    logRelationData(conn, simNr  , 

                                 ((Node) nodes[0]).getId() ,
                                 ((Node) nodes[1]).getId() ,
                                 distance[((Node) nodes[0]).getId() -1][((Node) nodes[1]).getId() -1]  ,
                                 ((Edge) edges[i]).getTieStrength()  ,
                                 ((Edge) edges[i]).getNeighborhoodOverlap(((Node) nodes[0]),((Node) nodes[1]))  ,
                                 timestamp
                          );
                    //pairs B--A
                    logRelationData(conn, simNr  , 

                                 ((Node) nodes[1]).getId() ,
                                 ((Node) nodes[0]).getId() ,
                                 distance[((Node) nodes[0]).getId() -1][((Node) nodes[1]).getId() -1]  ,
                                 ((Edge) edges[i]).getTieStrength()  ,
                                 ((Edge) edges[i]).getNeighborhoodOverlap(((Node) nodes[0]),((Node) nodes[1]))  ,
                                 timestamp
                      );

                    //end edges----------------------------

                }


                //System.out.println("NumNodes: " + numNodes);

                //non-adjacent nodes
                for (int i=0; i < numNodes; i++){
                    for (int j=0; j < numNodes; j++){
                      if (distance[i][j] != 1 && i != j ){
                          logRelationData(conn, simNr,
                             (i+1),
                             (j+1),
                             (distance[i][j]==0? 999 : distance[i][j]) , //0 auf infinity setzen (999)
                             -1,
                             -1,
                             timestamp
                          );
                      }
                    }
                }

                conn.close();
        }



private static void logGraphData(Connection con, int sim_no, int numberOfNodes, int numberOfEdges,  int radius, int diameter, int effDiameter, int maxDegree, long startTime,   long timestamp) throws SQLException{

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.GRAPH_DATA (" + "SIM_NO,NUMBER_OF_NODES,NUMBER_OF_EDGES,RADIUS,DIAMETER,EFF_DIAMETER,MAX_DEGREE,START_TIME,TIME_STAMP) VALUES (?,?,?,?,?,?,?,?,?)");
    preStmt.setInt(1, sim_no);
    preStmt.setInt(2, numberOfNodes);
    preStmt.setInt(3, numberOfEdges);
    preStmt.setInt(4, radius);
    preStmt.setInt(5, diameter);
    preStmt.setInt(6, effDiameter);
    preStmt.setInt(7, maxDegree);
    preStmt.setTimestamp(8, new Timestamp(startTime));
    preStmt.setTimestamp(9, new Timestamp(timestamp));
    preStmt.executeUpdate();
    preStmt.close();

    //con.close();

    }



private static void logNodeData(Connection con, int sim_no, int nodeId, int degree, double clusteringCoeff, long timeStamp) throws SQLException{

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.NODE_DATA (" + "SIM_NO,NODE_ID,DEGREE,CLUSTERING_COEFFICIENT,TIME_STAMP) VALUES (?,?,?,?,?)");
    preStmt.setInt(1, sim_no);
    preStmt.setInt(2, nodeId);
    preStmt.setInt(3, degree);
    preStmt.setDouble(4, clusteringCoeff);
    preStmt.setTimestamp(5, new Timestamp(timeStamp));
    preStmt.executeUpdate();
    preStmt.close();
    //con.close();

    }


private static void logRelationData(Connection con, int sim_no, int nodeId1, int nodeId2, int distance, int tieStrength, double neighborhoodOverlap, long timeStamp) throws SQLException{

    PreparedStatement preStmt = con.prepareStatement("INSERT INTO public.RELATION_DATA (" + "SIM_NO,NODE_ID1,NODE_ID2,DISTANCE,TIE_STRENGTH,NEIGHBORHOOD_OVERLAP,TIME_STAMP) VALUES (?,?,?,?,?,?,?)");
    preStmt.setInt(1, sim_no);
    preStmt.setInt(2, nodeId1);
    preStmt.setInt(3, nodeId2);
    preStmt.setInt(4, distance);
    preStmt.setInt(5, tieStrength);
    preStmt.setDouble(6, neighborhoodOverlap);
    preStmt.setTimestamp(7, new Timestamp(timeStamp));
    preStmt.executeUpdate();
    preStmt.close();
    //con.close();

    }

public static long getTimeStamp(){
    long t = System.currentTimeMillis();
    //SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    //String timestamp = sdf.format(t);
    return t; //timestamp;
}

}
2
  • Preparing the statement each time is slow. And you should batch the inserts to speed up the data. Check out stackoverflow.com/questions/3784197/… Commented Jun 30, 2016 at 21:57
  • Thanks Konrad - I'm going to try that out. At the meantime I found out how to deal with the copyManager as a special bulk insert functionality for postgresql. Commented Jul 1, 2016 at 18:54

1 Answer 1

2

I found the solution for my problem: it works very fast :)

package io;


import db.PostgresConnector;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;


public class DbLogger3 {

public static void main (String args[]) throws Exception {

        try  {
                Connection conn = new PostgresConnector().getConnection();

                CopyManager copyManager = new CopyManager((BaseConnection) conn);

                String str = new String();

                //add 5000 data sets: (important: \r\n after each record to set a line feed (postgres interpretes that as the end of each dataset) 
                for (int i=0; i<5000; i++){
                    str += "2;100;99;6.00;12.00;11.00;13;28.06.2016 00:08;28.06.2016 00:08"+"\r\n";
                }

                //transform the String into bytes:
                byte[] bytes = str.getBytes();

                //create ByteArrayInputStream object
                ByteArrayInputStream input = new ByteArrayInputStream(bytes);

                //insert into the database table (in my case: public.graph_data)
                copyManager.copyIn("COPY public.graph_data FROM STDIN WITH DELIMITER ';'", input);


        } catch (SQLException | IOException e) {
                throw new Exception(e);
        }


    }
}   
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.