1public class StoryReferenceMigrator implements ReferencedContentMigrator {
3 public static final String LEGACY_PREFIX = "image.";
5 private static final String IMG_TAG_NAME = "img";
6 private static final String SRC_ATTR_NAME = "src";
7 private static final String ALT_ATTR_NAME = "alt";
8 private static final String CLASS_ATTR_NAME = "class";
9 private static final String BUTTON_TAG_NAME = "button";
10 private static final String BUTTON_CSS_CLASS = "enhancement";
11 private static final String DATA_ID_ATTR_NAME = "data-id";
12 private static final String DATA_REFERENCE_ATTR_NAME = "data-reference";
15 public Collection<Class<? extends Migrator>> dependencies() {
16 Collection<Class<? extends Migrator>> dependencies = new HashSet<>();
17 dependencies.add(StoryMigrator.class);
23 public String getContextName() {
24 return AcmeMigrationContext.CONTEXT_NAME;
28 public Class<AcmeMigrationContext> getContextClass() {
29 return AcmeMigrationContext.class;
33 public String getLegacyIdPrefix(MigrationContext context) {
34 return StoryMigrator.LEGACY_PREFIX;
38 public String getReferencedLegacyIdPrefix(MigrationContext context) {
43 public Class<? extends Recordable> getReferencedContentClass() {
48 public Collection<String> getReferencedLegacyIds(Recordable obj) {
50 if (obj instanceof Story && ((Story) obj).getBody() != null) {
51 List<String> imageUrls = new ArrayList<>();
53 for (Object item : ((Story) obj).getBody()) {
54 if (item instanceof String) {
55 Document doc = Jsoup.parse((String) item);
56 Element body = doc.body();
58 for (Element imgElement : body.getElementsByTag(IMG_TAG_NAME)) {
59 if (imgElement.hasAttr(SRC_ATTR_NAME)) {
60 imageUrls.add(imgElement.attr(SRC_ATTR_NAME));
73 public void processObject(MigrationContext context, Recordable obj, String imageId, Recordable refObj) {
75 Story story = (Story) obj;
76 Image image = (Image) refObj;
78 if (story.getBody() != null) {
79 StringBuilder builder = new StringBuilder();
81 for (Object item : story.getBody()) {
83 if (item instanceof String) {
84 Document doc = Jsoup.parse((String) item);
85 doc.outputSettings().prettyPrint(false);
86 Element body = doc.body();
88 // Replace Images with image references.
89 for (Element imgElement : body.getElementsByTag(IMG_TAG_NAME)) {
90 String imgSrc = imgElement.attr(SRC_ATTR_NAME);
92 if (imgElement.hasAttr(SRC_ATTR_NAME) && imageId.equals(imgSrc)) {
94 if (imgElement.parent() == null) {
95 // This can happen if same image is referenced twice - it will have been replaced already
96 // and will error on subsequent iterations.
101 // Escape whitespace in URL.
102 String cleanImgSrc = imgSrc.trim().replaceAll(" ", "%20").replaceAll("\r", "").replaceAll("\n", "");
103 Image checkImage = null;
105 // Save binary or just a reference (DEV) to image.
106 StorageItem storageItem;
108 if (((AcmeMigrationContext) context).isDownloadImages()) {
109 checkImage = Query.from(Image.class)
110 .where("migration.legacyId = ?", "image." + cleanImgSrc)
113 // Don't download and save again if already pulled binary or if only saved URL reference.
114 if (checkImage == null || (checkImage.getFile() instanceof UrlStorageItem) || (checkImage.getFile() == null)) {
115 storageItem = StorageItem.Static.create();
117 String fileName = cleanImgSrc.substring(cleanImgSrc.lastIndexOf("/") + 1);
118 String hash = StringUtils.hex(StringUtils.md5(fileName));
120 // Always use a deterministic filename
121 storageItem.setPath(hash.substring(0, 2) + "/" + hash + "/" + fileName);
123 String extension = imgSrc.substring(cleanImgSrc.lastIndexOf(".") + 1);
124 storageItem.setContentType("image/" + extension);
126 if (!storageItem.isInStorage()) {
127 URL url = new URL(cleanImgSrc);
128 URLConnection c = url.openConnection();
129 c.setConnectTimeout(3000);
130 c.setReadTimeout(4000);
132 storageItem.setData(new ByteArrayInputStream(IoUtils.toByteArray(url.openStream())));
136 // Add Image field values.
137 image.setFile(storageItem);
141 storageItem = StorageItem.Static.createUrl(cleanImgSrc);
143 // Add Image field values.
144 image.setFile(storageItem);
147 if (imgElement.hasAttr(ALT_ATTR_NAME)) {
148 image.setAltText(imgElement.attr(ALT_ATTR_NAME));
152 Element sibling = imgElement.nextElementSibling();
153 if (sibling != null) {
154 Element creditElement = sibling.select("em").first();
156 if (creditElement != null) {
157 image.setCredit(creditElement.text());
163 String fileName = imgSrc.substring(imgSrc.lastIndexOf("/") + 1);
164 String title = fileName.split("\\.")[0];
165 image.setTitle(title);
166 image.as(Site.ObjectModification.class).setGlobal(true);
168 image.saveImmediately();
170 // Replace <img /> tag with Reference as custom RTE tag.
171 Element rte = doc.createElement(ImageRichTextElement.TAG_NAME)
172 .attr(ImageRichTextElement.TAG_ATTR_NAME_ID, image.getId().toString())
175 imgElement.before("<br/>").after("<br/><br/>");
177 imgElement.replaceWith(rte);
179 if (story.as(Promotable.Data.class).getPromoImage() == null) {
180 story.as(Promotable.Data.class).setPromoImage(image);
183 } catch (Exception e) {
189 builder.append(body.html());
193 if (builder.length() > 0) {
194 ReferentialText refText = new ReferentialText();
195 refText.addHtml(builder.toString());
197 story.setBody(refText);
202 private Element createRteReferenceElement(Document doc, UUID objectId, Map<String, Object> simpleValues) {
203 return doc.createElement(BUTTON_TAG_NAME)
204 .attr(CLASS_ATTR_NAME, BUTTON_CSS_CLASS)
205 .attr(DATA_ID_ATTR_NAME, objectId.toString())
206 .attr(DATA_REFERENCE_ATTR_NAME, ObjectUtils.toJson(simpleValues));