From c41f94ac4eb1f012c701821dda320cc1d81ef481 Mon Sep 17 00:00:00 2001
From: Satvik Vemuganti <vemugantisesha@iitbhilai.ac.in>
Date: Tue, 23 Aug 2022 02:06:29 +0530
Subject: [PATCH] metadata_dictionary: Add mappings for "*.podspec" files

---
 swh/indexer/data/podspec.csv                  |  68 +++++++++++
 swh/indexer/metadata_dictionary/__init__.py   |  18 ++-
 swh/indexer/metadata_dictionary/podspec.py    | 113 ++++++++++++++++++
 .../tests/metadata_dictionary/test_podspec.py |  65 ++++++++++
 swh/indexer/tests/test_cli.py                 |   3 +
 5 files changed, 266 insertions(+), 1 deletion(-)
 create mode 100644 swh/indexer/data/podspec.csv
 create mode 100644 swh/indexer/metadata_dictionary/podspec.py
 create mode 100644 swh/indexer/tests/metadata_dictionary/test_podspec.py

diff --git a/swh/indexer/data/podspec.csv b/swh/indexer/data/podspec.csv
new file mode 100644
index 00000000..08293602
--- /dev/null
+++ b/swh/indexer/data/podspec.csv
@@ -0,0 +1,68 @@
+Property,Podspec
+codeRepository,spec.source
+programmingLanguage,
+runtimePlatform,
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,
+softwareHelp,
+softwareRequirements,
+softwareVersion,
+storageRequirements,
+supportingData,
+author,spec.authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,
+license,spec.license
+producer,
+provider,
+publisher,
+sponsor,
+version,spec.version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,spec.summary
+identifier,
+name,spec.name
+sameAs,
+url,spec.homepage
+relatedLink,
+givenName,
+familyName,
+email,
+affiliation,
+identifier,
+name,
+address,
+type,
+id,
+softwareSuggestions,
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,
+referencePublication,
+readme,
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
index 99c2504c..5ad49e0e 100644
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -8,9 +8,24 @@ from typing import Dict, Type
 
 import click
 
-from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby
+from . import (
+    cff,
+    codemeta,
+    composer,
+    dart,
+    github,
+    maven,
+    npm,
+    nuget,
+    podspec,
+    python,
+    ruby,
+)
 from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
 
+# podspec,
+
+
 INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
     "CffMapping": cff.CffMapping,
     "CodemetaMapping": codemeta.CodemetaMapping,
@@ -21,6 +36,7 @@ INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
     "PythonPkginfoMapping": python.PythonPkginfoMapping,
     "ComposerMapping": composer.ComposerMapping,
     "NuGetMapping": nuget.NuGetMapping,
+    "PodspecMapping": podspec.PodspecMapping,
 }
 
 EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = {
diff --git a/swh/indexer/metadata_dictionary/podspec.py b/swh/indexer/metadata_dictionary/podspec.py
new file mode 100644
index 00000000..6e302f03
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/podspec.py
@@ -0,0 +1,113 @@
+import ast
+import itertools
+import os.path
+import re
+from typing import List
+
+from rdflib import Graph, Literal, URIRef
+
+from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
+from swh.indexer.namespaces import CODEMETA, SCHEMA
+from swh.indexer.storage.interface import Sha1
+
+from .base import DictMapping, SingleFileIntrinsicMapping
+
+PODSPEC_TABLE_PATH = os.path.join(_DATA_DIR, "podspec.csv")
+
+with open(PODSPEC_TABLE_PATH) as fd:
+    (CODEMETA_TERMS, PODSPEC_TABLE) = _read_crosstable(fd)
+
+
+class PodspecMapping(DictMapping, SingleFileIntrinsicMapping):
+    """
+    dedicated class for Podspec mapping and translation
+    """
+
+    name = "podspec"
+    mapping = PODSPEC_TABLE["Podspec"]
+    string_fields = [
+        "description",
+        "name",
+        "softwareVersion",
+    ]
+
+    _re_spec_new = re.compile(r".*Pod::Spec.new +(do|\{) +\|.*\|.*")
+    _re_spec_entry = re.compile(r"\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)")
+
+    @classmethod
+    def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
+        for entry in file_entries:
+            if entry["name"].endswith(b".podspec"):
+                return [entry["sha1"]]
+        return []
+
+    def translate(self, raw_content):
+        try:
+            raw_content = raw_content.decode()
+        except UnicodeDecodeError:
+            self.log.warning("Error unidecoding from %s", self.log_suffix)
+            return
+
+        lines = itertools.dropwhile(
+            lambda x: not self._re_spec_new.match(x), raw_content.split("\n")
+        )
+
+        try:
+            next(lines)
+        except StopIteration:
+            self.log.warning("Could not find Pod::Specification in %s", self.log_suffix)
+            return
+
+        content_dict = {}
+        for line in lines:
+            match = self._re_spec_entry.match(line)
+            if match:
+                value = self.eval_podspec_expression(match.group("expr"))
+                if value:
+                    content_dict[match.group("key")] = value
+        return self._translate_dict(content_dict)
+
+    def eval_podspec_expression(self, expr):
+        def evaluator(node):
+            if isinstance(node, ast.Str):
+                return node.s
+            elif isinstance(node, ast.List):
+                res = []
+                for element in node.elts:
+                    val = evaluator(element)
+                    if not val:
+                        return
+                    res.append(val)
+                return res
+
+        expr = expr.replace(".freeze", "")
+        try:
+            tree = ast.parse(expr, mode="eval")
+        except (SyntaxError, ValueError):
+            return
+        if isinstance(tree, ast.Expression):
+            return evaluator(tree.body)
+
+    def translate_summary(self, graph: Graph, root, s):
+        if isinstance(s, str):
+            graph.add((root, SCHEMA.description, Literal(s)))
+
+    def parse_enum(self, enum_string):
+        if enum_string.startswith("{"):
+            items = enum_string.strip("{ }\n").split(",")
+            parsed = {}
+            for item in items:
+                parsed[item.split("=>")[0].strip("\n ")] = item.split("=>")[1].strip(
+                    "\n "
+                )[1:-1]
+
+            return parsed
+
+    def translate_source(self, graph: Graph, root, s):
+        if isinstance(s, str):
+            parsed = self.parse_enum(s)
+            if parsed:
+                if ":git" in parsed:
+                    s = parsed[":git"]
+            graph.add((root, CODEMETA.codeRepository, URIRef(s)))
diff --git a/swh/indexer/tests/metadata_dictionary/test_podspec.py b/swh/indexer/tests/metadata_dictionary/test_podspec.py
new file mode 100644
index 00000000..40412d24
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_podspec.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_podspec():
+    raw_content = b"""Pod::Spec.new do |spec|
+  spec.name          = 'Reachability'
+  spec.version       = '3.1.0'
+  spec.license       = { :type => 'BSD' }
+  spec.homepage      = 'https://github.com/tonymillion/Reachability'
+  spec.authors       = { 'Tony Million' => 'tonymillion@gmail.com' }
+  spec.summary       = 'ARC and GCD Compatible Reachability Class for iOS and OS X.'
+  spec.source        = { :git => 'https://github.com/tonymillion/Reachability.git' }
+  spec.module_name   = 'Rich'
+  spec.swift_version = '4.0'
+
+  spec.ios.deployment_target  = '9.0'
+  spec.osx.deployment_target  = '10.10'
+
+  spec.source_files       = 'Reachability/common/*.swift'
+  spec.ios.source_files   = 'Reachability/ios/*.swift', 'Reachability/extensions/*.swift'
+  spec.osx.source_files   = 'Reachability/osx/*.swift'
+
+  spec.framework      = 'SystemConfiguration'
+  spec.ios.framework  = 'UIKit'
+  spec.osx.framework  = 'AppKit'
+
+  spec.dependency 'SomeOtherPod'
+end"""
+    result = MAPPINGS["PodspecMapping"]().translate(raw_content)
+    expected = {
+        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+        "type": "SoftwareSourceCode",
+        "author": [
+            {"type": "Person", "name": "Tony Million", "email": "tonymillion@gmail.com"}
+        ],
+        "description": "ARC and GCD Compatible Reachability Class for iOS and OS X.",
+        "url": "https://github.com/tonymillion/Reachability",
+        "codeRepository": "https://github.com/tonymillion/Reachability.git",
+        "name": "Reachability",
+        "softwareVersion": "3.1.0",
+    }
+
+    assert result == expected
+
+
+def test_parse_enum():
+    raw_content = """{
+        :git => 'https://github.com/tensorflow/tensorflow.git',
+        :commit => 'd8ce9f9c301d021a69953134185ab728c1c248d3'
+        }
+    """
+    expected = {
+        ":git": "https://github.com/tensorflow/tensorflow.git",
+        ":commit": "d8ce9f9c301d021a69953134185ab728c1c248d3",
+    }
+
+    result = MAPPINGS["PodspecMapping"]().parse_enum(raw_content)
+
+    assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
index 6bbab408..242be195 100644
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -117,10 +117,13 @@ def test_cli_mapping_list(cli_runner, swh_config):
             "nuget",
             "pkg-info",
             "pubspec",
+            "podspec",
             "sword-codemeta",
             "",
         ]  # must be sorted for test to pass
     )
+    # "podspec",
+
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
-- 
GitLab