From 43ab8085db8dc16573e987f057759d46351c5893 Mon Sep 17 00:00:00 2001 From: rmar3a Date: Sun, 24 Sep 2023 20:30:55 -0700 Subject: [PATCH] Setting the database collation via the configuration file. - The executable comments found in the SQL files were being removed due to the logic in the prepareFile() method. - Updated areas in the codebase that set the collation to set the collation via a value drawn from the configuration file passed to FactorBase. - Updated the files in the expected-output directory since these appear to be the deterministic results when the collation is consistent for all the databases. --- .../java/ca/sfu/cs/factorbase/app/RunBB.java | 3 ++ .../database/MySQLFactorBaseDataBase.java | 32 ++++++++++++++----- .../cs/factorbase/learning/BayesBaseH.java | 6 +++- .../cs/factorbase/learning/CountsManager.java | 4 ++- .../cs/factorbase/util/MySQLScriptRunner.java | 29 ++++++++++++++--- .../sfu/cs/factorbase/util/RuntimeLogger.java | 6 ++-- .../scripts/initialize_databases.sql | 4 +-- config.cfg | 1 + travis-resources/config.cfg | 1 + .../expected-output/Bif_unielwin.xml | 2 +- .../expected-output/mysql-extraction.txt | 16 +++++----- 11 files changed, 75 insertions(+), 29 deletions(-) diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/app/RunBB.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/app/RunBB.java index 680efb38..155059ff 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/app/RunBB.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/app/RunBB.java @@ -52,12 +52,14 @@ public static void main( RuntimeLogger.logRunTime(logger, "Logger + Config Initialization", start, configEnd); long databaseStart = System.currentTimeMillis(); + String databaseCollation = config.getProperty("dbcollation"); FactorBaseDataBase factorBaseDatabase = new MySQLFactorBaseDataBase( new FactorBaseDataBaseInfo(config), config.getProperty("dbaddress"), config.getProperty("dbname"), config.getProperty("dbusername"), config.getProperty("dbpassword"), + databaseCollation, countingStrategy ); RuntimeLogger.logRunTime(logger, "Creating Database Connection", databaseStart, System.currentTimeMillis()); @@ -92,6 +94,7 @@ public static void main( BayesBaseH.runBBH( factorBaseDatabase, globalLattice, + databaseCollation, countingStrategy ); RuntimeLogger.logRunTime(logger, "Running BayesBaseH", bayesBaseHStart, System.currentTimeMillis()); diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/database/MySQLFactorBaseDataBase.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/database/MySQLFactorBaseDataBase.java index d93f7b28..ca12f283 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/database/MySQLFactorBaseDataBase.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/database/MySQLFactorBaseDataBase.java @@ -46,6 +46,7 @@ public class MySQLFactorBaseDataBase implements FactorBaseDataBase { private Connection dbConnection; private FactorBaseDataBaseInfo dbInfo; private Map dataExtractors; + private String dbCollation; private CountingStrategy countingStrategy; @@ -66,10 +67,12 @@ public MySQLFactorBaseDataBase( String dbname, String username, String password, + String dbCollation, CountingStrategy countingStrategy ) throws DataBaseException { this.dbInfo = dbInfo; this.baseDatabaseName = dbname; + this.dbCollation = dbCollation; this.countingStrategy = countingStrategy; String baseConnectionString = MessageFormat.format(CONNECTION_STRING, dbaddress, dbname); Properties connectionProperties = getConnectionStringProperties(username, password); @@ -89,7 +92,8 @@ public void setupDatabase() throws DataBaseException { MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "initialize_databases.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); // Switch to start using the setup database. @@ -97,12 +101,14 @@ public void setupDatabase() throws DataBaseException { MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "metadata.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "metadata_storedprocedures.sql", this.baseDatabaseName, + this.dbCollation, "//" ); MySQLScriptRunner.callSP(this.dbConnection, "find_values"); @@ -111,7 +117,8 @@ public void setupDatabase() throws DataBaseException { MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "latticegenerator_initialize.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); // Switch to start using the BN database. @@ -120,50 +127,59 @@ public void setupDatabase() throws DataBaseException { RuntimeLogger.setupLoggingTable( this.dbConnection, this.baseDatabaseName, - this.dbInfo.getBNDatabaseName() + this.dbInfo.getBNDatabaseName(), + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "latticegenerator_initialize_local.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "latticegenerator_populate.sql", this.baseDatabaseName, + this.dbCollation, "//" ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "transfer_initialize.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "transfer_cascade.sql", this.baseDatabaseName, + this.dbCollation, "//" ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "modelmanager_initialize.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "metaqueries_initialize.sql", - this.baseDatabaseName + this.baseDatabaseName, + this.dbCollation ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "metaqueries_populate.sql", this.baseDatabaseName, + this.dbCollation, "//" ); MySQLScriptRunner.runScript( this.dbConnection, Config.SCRIPTS_DIRECTORY + "metaqueries_RChain.sql", this.baseDatabaseName, + this.dbCollation, "//" ); } catch (SQLException | IOException e) { diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/BayesBaseH.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/BayesBaseH.java index aa7191b3..a34f26a5 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/BayesBaseH.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/BayesBaseH.java @@ -108,6 +108,7 @@ public class BayesBaseH { public static void runBBH( FactorBaseDataBase database, RelationshipLattice globalLattice, + String databaseCollation, CountingStrategy countingStrategy ) throws SQLException, IOException, DataBaseException, DataExtractionException, ParsingException, ScoringException { initProgram(FirstRunning); @@ -124,6 +125,7 @@ public static void runBBH( StructureLearning( database, con2, + databaseCollation, countingStrategy, globalLattice ); @@ -206,6 +208,7 @@ public static void runBBH( private static void StructureLearning( FactorBaseDataBase database, Connection conn, + String databaseCollation, CountingStrategy countingStrategy, RelationshipLattice lattice ) throws SQLException, IOException, DataBaseException, DataExtractionException, ParsingException, ScoringException { @@ -221,7 +224,8 @@ private static void StructureLearning( MySQLScriptRunner.runScript( conn, Config.SCRIPTS_DIRECTORY + "modelmanager_populate.sql", - databaseName + databaseName, + databaseCollation ); // Handle rnodes in a bottom-up way following the lattice. diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/CountsManager.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/CountsManager.java index 970e60be..3d23d5c1 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/CountsManager.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/learning/CountsManager.java @@ -69,6 +69,7 @@ public class CountsManager { private static boolean generatePDPInfo; private static String linkCorrelation; private static long dbTemporaryTableSize; + private static String dbCollation; /* * cont is Continuous * ToDo: Refactor @@ -93,7 +94,7 @@ public static void buildCT( RuntimeLogger.addLogEntry(dbConnection); try (Statement statement = dbConnection.createStatement()) { statement.execute("DROP SCHEMA IF EXISTS " + dbInfo.getCTDatabaseName() + ";"); - statement.execute("CREATE SCHEMA " + dbInfo.getCTDatabaseName() + " /*M!100316 COLLATE utf8_general_ci*/;"); + statement.execute("CREATE SCHEMA " + dbInfo.getCTDatabaseName() + " COLLATE " + dbCollation + ";"); } // Propagate metadata based on the FunctorSet. @@ -492,6 +493,7 @@ private static void setVarsFromConfig() { dbPassword = conf.getProperty("dbpassword"); dbaddress = conf.getProperty("dbaddress"); dbTemporaryTableSize = Math.round(1024 * 1024 * 1024 * Double.valueOf(conf.getProperty("dbtemporarytablesize"))); + dbCollation = conf.getProperty("dbcollation"); linkCorrelation = conf.getProperty("LinkCorrelations"); cont = conf.getProperty("Continuous"); String loggingLevel = conf.getProperty("LoggingLevel"); diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/MySQLScriptRunner.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/MySQLScriptRunner.java index babfdfdf..d3de2d1c 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/MySQLScriptRunner.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/MySQLScriptRunner.java @@ -32,9 +32,14 @@ private MySQLScriptRunner() { * * @param fileName - the file to create a copy of with the variables filled in. * @param databaseName - the name of the database to replace instances of "@database@" with. + * @param databaseCollation - the collation to use for the tables created. * @throws IOException if there is an issue reading from the script. */ - private static String prepareFile(String fileName, String databaseName) throws IOException { + private static String prepareFile( + String fileName, + String databaseName, + String databaseCollation + ) throws IOException { InputStream inputStream = MySQLScriptRunner.class.getClassLoader().getResourceAsStream(fileName); if (inputStream == null) { throw new FileNotFoundException("Unable to read the file: " + fileName); @@ -59,6 +64,7 @@ private static String prepareFile(String fileName, String databaseName) throws I String finalOutput = ""; while (line != null) { line = line.replace("@database@", databaseName); + line = line.replace("@dbcollation@", databaseCollation); finalOutput += line + System.getProperty("line.separator"); line = input.readLine(); } @@ -94,11 +100,17 @@ public static void callSP(Connection dbConnection, String spName) throws SQLExce * @param dbConnection - connection to the database to execute the script on. * @param scriptFileName - the path to the MySQL script to execute. * @param databaseName - the name of the database to replace instances of "@database@" with. + * @param databaseCollation - the collation to use for the tables created. * @throws SQLException if there is an issue executing the command(s). * @throws IOException if there is an issue reading from the script. */ - public static void runScript(Connection dbConnection, String scriptFileName, String databaseName) throws SQLException, IOException { - runScript(dbConnection, scriptFileName, databaseName, ";"); + public static void runScript( + Connection dbConnection, + String scriptFileName, + String databaseName, + String databaseCollation + ) throws SQLException, IOException { + runScript(dbConnection, scriptFileName, databaseName, databaseCollation, ";"); } @@ -110,12 +122,19 @@ public static void runScript(Connection dbConnection, String scriptFileName, Str * @param dbConnection - connection to the database to execute the script on. * @param scriptFileName - the path to the MySQL script to execute. * @param databaseName - the name of the database to replace instances of "@database@" with. + * @param databaseCollation - the collation to use for the tables created. * @param delimiter - the delimiter to use when reading the commands from the given script. * @throws SQLException if there is an issue executing the command(s). * @throws IOException if there is an issue reading from the script. */ - public static void runScript(Connection dbConnection, String scriptFileName, String databaseName, String delimiter) throws SQLException, IOException { - String newScriptFileName = prepareFile(scriptFileName, databaseName); + public static void runScript( + Connection dbConnection, + String scriptFileName, + String databaseName, + String databaseCollation, + String delimiter + ) throws SQLException, IOException { + String newScriptFileName = prepareFile(scriptFileName, databaseName, databaseCollation); ScriptRunner runner = new ScriptRunner(dbConnection); try ( diff --git a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/RuntimeLogger.java b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/RuntimeLogger.java index e38cbda4..2a3c4c1b 100644 --- a/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/RuntimeLogger.java +++ b/code/factorbase/src/main/java/ca/sfu/cs/factorbase/util/RuntimeLogger.java @@ -94,13 +94,15 @@ public static void logExecutedQuery(Logger logger, String query) { public static void setupLoggingTable( Connection dbConnection, String baseDatabaseName, - String loggingTableDatabaseName + String loggingTableDatabaseName, + String databaseCollation ) throws SQLException, IOException { dbName = loggingTableDatabaseName; MySQLScriptRunner.runScript( dbConnection, Config.SCRIPTS_DIRECTORY + "logging.sql", - baseDatabaseName + baseDatabaseName, + databaseCollation ); } diff --git a/code/factorbase/src/main/resources/scripts/initialize_databases.sql b/code/factorbase/src/main/resources/scripts/initialize_databases.sql index a382d46c..11dcb1cb 100644 --- a/code/factorbase/src/main/resources/scripts/initialize_databases.sql +++ b/code/factorbase/src/main/resources/scripts/initialize_databases.sql @@ -1,7 +1,5 @@ -- Initialize the databases required by FactorBase. -/*M!100316 SET collation_server = 'utf8_general_ci';*/ - -SET collation_server = 'utf8_general_ci'; +SET collation_server = @dbcollation@; DROP SCHEMA IF EXISTS @database@_setup; CREATE SCHEMA @database@_setup; diff --git a/config.cfg b/config.cfg index 077f9626..a292ab0d 100644 --- a/config.cfg +++ b/config.cfg @@ -4,6 +4,7 @@ dbname = name_of_database dbusername = database_username dbpassword = database_password dbtemporarytablesize = 4 +dbcollation = utf8_general_ci # FactorBase Configurations AutomaticSetup = 1 diff --git a/travis-resources/config.cfg b/travis-resources/config.cfg index f6bcc213..6056ea3b 100644 --- a/travis-resources/config.cfg +++ b/travis-resources/config.cfg @@ -4,6 +4,7 @@ dbname = unielwin dbusername = root dbpassword = 123456 dbtemporarytablesize = 4 +dbcollation = utf8_general_ci # FactorBase Configurations AutomaticSetup = 1 diff --git a/travis-resources/expected-output/Bif_unielwin.xml b/travis-resources/expected-output/Bif_unielwin.xml index 60b6170d..8194567b 100644 --- a/travis-resources/expected-output/Bif_unielwin.xml +++ b/travis-resources/expected-output/Bif_unielwin.xml @@ -95,7 +95,7 @@ xsi:schemaLocation="http://www.cs.ubc.ca/labs/lci/fopi/ve/XMLBIFv0_3 http://www. capability(prof0,student0) RA(prof0,student0) salary(prof0,student0) - 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667 0.000491 0.000491 0.000491 0.000491 0.000491 0.997545 0.008621 0.008621 0.181034 0.439655 0.353448 0.008621 0.375 0.375 0.196429 0.017857 0.017857 0.017857 0.322917 0.21875 0.427082 0.010417 0.010417 0.010417 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667
+ 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667 0.000491 0.000491 0.000491 0.000491 0.000491 0.997545 0.008621 0.008621 0.181034 0.439655 0.353448 0.008621 0.375 0.375 0.196429 0.017857 0.017857 0.017857 0.322917 0.21875 0.427082 0.010417 0.010417 0.010417 0.166667 0.166667 0.166665 0.166667 0.166667 0.166667
intelligence(student0) diff --git a/travis-resources/expected-output/mysql-extraction.txt b/travis-resources/expected-output/mysql-extraction.txt index e403e9d6..23283abb 100644 --- a/travis-resources/expected-output/mysql-extraction.txt +++ b/travis-resources/expected-output/mysql-extraction.txt @@ -951,12 +951,12 @@ Table: capability(prof0,student0)_CP_pairs 0 N/A T N/A MULT capability(prof0,student0) RA(prof0,student0) salary(prof0,student0) Table: capability(prof0,student0)_CP_smoothed -1 1 F high 6 NULL 0.166667 NULL NULL -1 1 F low 6 NULL 0.166667 NULL NULL -1 1 F med 6 NULL 0.166667 NULL NULL +1 1 F high 6 NULL 0.166665 NULL NULL +1 1 F low 6 NULL 0.166665 NULL NULL +1 1 F med 6 NULL 0.166665 NULL NULL 1 1 F N/A 2036 NULL 0.000491 NULL NULL 1 1 T high 116 NULL 0.008621 NULL NULL -1 1 T N/A 6 NULL 0.166667 NULL NULL +1 1 T N/A 6 NULL 0.166665 NULL NULL 1 2 F high 6 NULL 0.166667 NULL NULL 1 2 F low 6 NULL 0.166667 NULL NULL 1 2 F med 6 NULL 0.166667 NULL NULL @@ -975,13 +975,13 @@ Table: capability(prof0,student0)_CP_smoothed 1 4 T low 56 NULL 0.017857 NULL NULL 1 4 T med 96 NULL 0.010417 NULL NULL 1 4 T N/A 6 NULL 0.166667 NULL NULL -1 5 F high 6 NULL 0.166665 NULL NULL -1 5 F low 6 NULL 0.166665 NULL NULL -1 5 F med 6 NULL 0.166665 NULL NULL +1 5 F high 6 NULL 0.166667 NULL NULL +1 5 F low 6 NULL 0.166667 NULL NULL +1 5 F med 6 NULL 0.166667 NULL NULL 1 5 F N/A 2036 NULL 0.000491 NULL NULL 1 5 T low 56 NULL 0.017857 NULL NULL 1 5 T med 96 NULL 0.010417 NULL NULL -1 5 T N/A 6 NULL 0.166665 NULL NULL +1 5 T N/A 6 NULL 0.166667 NULL NULL 1 N/A F high 6 NULL 0.166667 NULL NULL 1 N/A F low 6 NULL 0.166667 NULL NULL 1 N/A F med 6 NULL 0.166667 NULL NULL