sdap-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From eamonf...@apache.org
Subject [incubator-sdap-ingester] branch dev updated: SDAP-300: Fix bug that prevented collection manager from seeing files in a directory when the path had no glob-style wildcard character. Also add back support for scanning dirs recursively. (#27)
Date Tue, 05 Jan 2021 18:06:42 GMT
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git


The following commit(s) were added to refs/heads/dev by this push:
     new 7e04251  SDAP-300: Fix bug that prevented collection manager from seeing files in
a directory when the path had no glob-style wildcard character. Also add back support for
scanning dirs recursively. (#27)
7e04251 is described below

commit 7e042510eea58a423aad4d4634d69ae01ce73146
Author: Eamon Ford <eamonford@gmail.com>
AuthorDate: Tue Jan 5 10:05:04 2021 -0800

    SDAP-300: Fix bug that prevented collection manager from seeing files in a directory when
the path had no glob-style wildcard character. Also add back support for scanning dirs recursively.
(#27)
    
    Co-authored-by: Eamon Ford <eamon.d.ford@jpl.nasa.gov>
---
 .../collection_manager/entities/Collection.py              |  7 ++++---
 .../collection_manager/services/CollectionWatcher.py       | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/collection_manager/collection_manager/entities/Collection.py b/collection_manager/collection_manager/entities/Collection.py
index 7a45b66..389e135 100644
--- a/collection_manager/collection_manager/entities/Collection.py
+++ b/collection_manager/collection_manager/entities/Collection.py
@@ -1,11 +1,12 @@
 import os
-from urllib.parse import urlparse
+import pathlib
 from dataclasses import dataclass
 from datetime import datetime
+from enum import Enum
 from fnmatch import fnmatch
 from glob import glob
 from typing import List, Optional
-from enum import Enum
+from urllib.parse import urlparse
 
 from collection_manager.entities.exceptions import MissingValueCollectionError
 
@@ -68,6 +69,6 @@ class Collection:
                 raise IsADirectoryError()
 
             if os.path.isdir(self.path):
-                return os.path.dirname(file_path) == self.path
+                return pathlib.Path(self.path) in pathlib.Path(file_path).parents
             else:
                 return fnmatch(file_path, self.path)
diff --git a/collection_manager/collection_manager/services/CollectionWatcher.py b/collection_manager/collection_manager/services/CollectionWatcher.py
index b1aaf4e..68b013a 100644
--- a/collection_manager/collection_manager/services/CollectionWatcher.py
+++ b/collection_manager/collection_manager/services/CollectionWatcher.py
@@ -116,11 +116,16 @@ class CollectionWatcher:
         logger.info(f"Scanning files for {len(collections)} collections...")
         start = time.perf_counter()
         for collection in collections:
-            for granule_path in glob(collection.path, recursive=True):
+            for granule_path in self._get_files_at_path(collection.path):
                 modified_time = int(os.path.getmtime(granule_path))
                 await self._granule_updated_callback(granule_path, modified_time, collection)
         logger.info(f"Finished scanning files in {time.perf_counter() - start} seconds.")
 
+    def _get_files_at_path(self, path: str) -> List[str]:
+        if os.path.isfile(path):
+            return [path]
+        return [f for f in glob(path + '/**', recursive=True) if os.path.isfile(f)]
+
     async def _reload_and_reschedule(self):
         try:
             updated_collections = self._get_updated_collections()
@@ -191,11 +196,14 @@ class _GranuleEventHandler(FileSystemEventHandler):
 
     def on_created(self, event):
         super().on_created(event)
-        self._handle_event(event)
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def on_modified(self, event):
         super().on_modified(event)
-        self._handle_event(event)
+
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def _handle_event(self, event):
         path = event.src_path


Mime
View raw message