Skip to content

Commit

Permalink
Merge pull request #193 from MontrealCorpusTools/update-packaging
Browse files Browse the repository at this point in the history
Update packaging and neo4j version
  • Loading branch information
msonderegger authored Jul 11, 2024
2 parents ec25b66 + ae506d7 commit 7f40cd4
Show file tree
Hide file tree
Showing 16 changed files with 443 additions and 425 deletions.
12 changes: 6 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ name: polyglotdb-dev
channels:
- conda-forge
dependencies:
- openjdk=21
- pip
- librosa
- scipy
- praatio ~= 5.0
- scipy<=1.12.0
- praatio<=5.0
- textgrid
- influxdb
- tqdm
- future
- requests
- openjdk=11
- pip
- neo4j-python-driver
- pip:
- conch_sounds
- neo4j-driver ~= 4.3
- conch-sounds
Binary file removed polyglotdb/.DS_Store
Binary file not shown.
5 changes: 0 additions & 5 deletions polyglotdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
__ver_major__ = 1
__ver_minor__ = 2
__ver_patch__ = 1
__version__ = f"{__ver_major__}.{__ver_minor__}.{__ver_patch__}"

__all__ = ['query', 'io', 'corpus', 'config', 'exceptions', 'CorpusContext', 'CorpusConfig']

import polyglotdb.query.annotations as graph
Expand Down
11 changes: 6 additions & 5 deletions polyglotdb/acoustics/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,11 @@ def point_measures_from_csv(corpus_context, header_info, annotation_type="phone"
import_path = 'file:///{}'.format(make_path_safe(path))

import_statement = '''
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "{path}" AS csvLine
MATCH (n:{annotation_type}:{corpus_name}) where n.id = csvLine.id
SET {new_properties}'''
CALL {{
LOAD CSV WITH HEADERS FROM "{path}" AS csvLine
MATCH (n:{annotation_type}:{corpus_name}) WHERE n.id = csvLine.id
SET {new_properties}
}} IN TRANSACTIONS OF 2000 ROWS'''

statement = import_statement.format(path=import_path,
corpus_name=corpus_context.cypher_safe_name,
Expand All @@ -159,7 +160,7 @@ def point_measures_from_csv(corpus_context, header_info, annotation_type="phone"
if h == 'id':
continue
try:
corpus_context.execute_cypher('CREATE INDEX ON :%s(%s)' % (annotation_type, h))
corpus_context.execute_cypher('CREATE INDEX FOR (n:%s) ON (n.%s)' % (annotation_type, h))
except neo4j.exceptions.ClientError as e:
if e.code != 'Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists':
raise
Expand Down
2 changes: 1 addition & 1 deletion polyglotdb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import configparser

CONFIG_DIR = os.path.expanduser('~/.pgdb')
CONFIG_DIR = os.environ.get('PGDB_HOME', os.path.expanduser('~/.pgdb'))

BASE_DIR = os.path.join(CONFIG_DIR, 'data')

Expand Down
6 changes: 3 additions & 3 deletions polyglotdb/corpus/importable.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ def initialize_import(self, speakers, token_headers, subannotations=None):
w.writeheader()

def _corpus_index(tx):
tx.run('CREATE CONSTRAINT ON (node:Corpus) ASSERT node.name IS UNIQUE')
tx.run('CREATE CONSTRAINT FOR (node:Corpus) REQUIRE node.name IS UNIQUE')

def _discourse_index(tx):
tx.run('CREATE INDEX ON :Discourse(name)')
tx.run('CREATE INDEX FOR (d:Discourse) ON (d.name)')

def _speaker_index(tx):
tx.run('CREATE INDEX ON :Speaker(name)')
tx.run('CREATE INDEX FOR (s:Speaker) ON (s.name)')

def _corpus_create(tx, corpus_name):
tx.run('MERGE (n:Corpus {name: $corpus_name}) return n', corpus_name=corpus_name)
Expand Down
55 changes: 26 additions & 29 deletions polyglotdb/databases/neo4j.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ dbms.security.auth_enabled=false
# calculated based on available system resources.
# Uncomment these lines to set specific initial and maximum
# heap size.
dbms.memory.heap.initial_size=512m
dbms.memory.heap.max_size=512m
server.memory.heap.initial_size=512m
server.memory.heap.max_size=512m

# The amount of memory to use for mapping the store files, in bytes (or
# kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g').
Expand Down Expand Up @@ -66,17 +66,17 @@ dbms.memory.heap.max_size=512m
# individual advertised_address.

# Bolt connector
dbms.connector.bolt.enabled=true
server.bolt.enabled=true
#dbms.connector.bolt.tls_level=OPTIONAL
dbms.connector.bolt.listen_address=:{bolt_port}
server.bolt.listen_address=:{bolt_port}

# HTTP Connector. There can be zero or one HTTP connectors.
dbms.connector.http.enabled=true
dbms.connector.http.listen_address=:{http_port}
server.http.enabled=true
server.http.listen_address=:{http_port}

# HTTPS Connector. There can be zero or one HTTPS connectors.
dbms.connector.https.enabled=false
dbms.connector.https.listen_address=:{https_port}
server.https.enabled=false
server.https.listen_address=:{https_port}

# Number of Neo4j worker threads.
#dbms.threads.worker_count=
Expand Down Expand Up @@ -255,7 +255,7 @@ dbms.security.allow_csv_import_from_file_urls=true
#dbms.security.http_strict_transport_security=

# Retention policy for transaction logs needed to perform recovery and backups.
dbms.tx_log.rotation.retention_policy=false
db.tx_log.rotation.retention_policy=false

# Only allow read operations from this Neo4j instance. This mode still requires
# write access to the directory for lock purposes.
Expand Down Expand Up @@ -283,31 +283,20 @@ dbms.tx_log.rotation.retention_policy=false

# G1GC generally strikes a good balance between throughput and tail
# latency, without too much tuning.
dbms.jvm.additional=-XX:+UseG1GC

# Have common exceptions keep producing stack traces, so they can be
# debugged regardless of how often logs are rotated.
dbms.jvm.additional=-XX:-OmitStackTraceInFastThrow

# Make sure that `initmemory` is not only allocated, but committed to
# the process, before starting the database. This reduces memory
# fragmentation, increasing the effectiveness of transparent huge
# pages. It also reduces the possibility of seeing performance drop
# due to heap-growing GC events, where a decrease in available page
# cache leads to an increase in mean IO response time.
# Try reducing the heap memory, if this flag degrades performance.
dbms.jvm.additional=-XX:+AlwaysPreTouch

# Trust that non-static final fields are really final.
# This allows more optimizations and improves overall performance.
# NOTE: Disable this if you use embedded mode, or have extensions or dependencies that may use reflection or
# serialization to change the value of final fields!
dbms.jvm.additional=-XX:+UnlockExperimentalVMOptions
dbms.jvm.additional=-XX:+TrustFinalNonStaticFields

# Disable explicit garbage collection, which is occasionally invoked by the JDK itself.
dbms.jvm.additional=-XX:+DisableExplicitGC

# Remote JMX monitoring, uncomment and adjust the following lines as needed. Absolute paths to jmx.access and
# jmx.password files are required.
# Also make sure to update the jmx.access and jmx.password files with appropriate permission roles and passwords,
Expand All @@ -328,10 +317,23 @@ dbms.jvm.additional=-XX:+DisableExplicitGC

# Expand Diffie Hellman (DH) key size from default 1024 to 2048 for DH-RSA cipher suites used in server TLS handshakes.
# This is to protect the server from any potential passive eavesdropping.
dbms.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048

# This mitigates a DDoS vector.
dbms.jvm.additional=-Djdk.tls.rejectClientInitiatedRenegotiation=true
#********************************************************************
# Other Neo4j system properties
#********************************************************************
server.jvm.additional=-XX:+UseG1GC
server.jvm.additional=-XX:-OmitStackTraceInFastThrow
server.jvm.additional=-XX:+AlwaysPreTouch
server.jvm.additional=-XX:+UnlockExperimentalVMOptions
server.jvm.additional=-XX:+TrustFinalNonStaticFields
server.jvm.additional=-XX:+DisableExplicitGC
server.jvm.additional=-Djdk.tls.ephemeralDHKeySize=2048
server.jvm.additional=-Djdk.tls.rejectClientInitiatedRenegotiation=true
server.jvm.additional=-Dunsupported.dbms.udc.source=tarball
server.jvm.additional=--add-opens=java.base/java.nio=ALL-UNNAMED
server.jvm.additional=--add-opens=java.base/java.io=ALL-UNNAMED
server.jvm.additional=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
server.jvm.additional=-Dlog4j2.disable.jmx=true

#********************************************************************
# Wrapper Windows NT/2000/XP Service Properties
Expand All @@ -342,9 +344,4 @@ dbms.jvm.additional=-Djdk.tls.rejectClientInitiatedRenegotiation=true
# service can then be reinstalled.

# Name of the service
dbms.windows_service_name=neo4j

#********************************************************************
# Other Neo4j system properties
#********************************************************************
dbms.jvm.additional=-Dunsupported.dbms.udc.source=tarball
server.windows_service_name=neo4j
Binary file removed polyglotdb/io/.DS_Store
Binary file not shown.
Loading

0 comments on commit 7f40cd4

Please sign in to comment.